/*
The contents of this file are subject to the Mozilla Public License Version 1.1
(the "License"); you may not use this file except in compliance with the
License. You may obtain a copy of the License at http://www.mozilla.org/MPL/

Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
the specific language governing rights and limitations under the License.

The Original Code is comprised of the ADF directory

The Initial Developer of the Original Code is
PaperThin, Inc.  Copyright (c) 2009-2025.
All Rights Reserved.

By downloading, modifying, distributing, using and/or accessing any files
in this directory, you agree to the terms and conditions of the applicable
end user license agreement.
*/

/* *************************************************************** */
/*
Author:
	PaperThin, Inc.
Name:
	data_2_1.cfc
Summary:
	Data Utils component functions for the ADF Library
Version:
	2.1
History:
	2023-03-02 - GAC - Created
	2024-06-25 - GAC - Added calcStartEndRows(), symbolsToASCII(), and convertTagAttrsSingleToDoubleQuotes() methods
   2024-07-09 - GAC - Added listToEmptyStruct(), generateRandomString(), and removeDuplicatesFromArray() functions
   2024-11-20 - GAC - Added the parseUrl() function
   2025-01-28 - GAC - Added a new CS deepCopy() wrapper function
*/
component displayname="data_2_1" extends="data_2_0" hint="Data Utils component functions for the ADF Library" output="no"
{
    /* PROPERTIES */
	property name="version" type="string" default="2_1_4";
	property name="type" value="singleton";
	property name="wikiTitle" value="Data_2_1";

   /*
	   Name:
         parseHTMLTag(tag)
      Summary:
         I split an HTML tag string into a structure with keys:
         NAME (Tag Name), HTML (Original HTML Tag), ATTRIBS (struct of tag attributes)

         IMPORTANT: Does not support data attributes

         Based on article by Ben Nadel
	      https://www.bennadel.com/blog/779-parsing-html-tag-data-into-a-coldfusion-structure.htm
      Arguments:
         string - tag
	   History:
         2023-03-03 - GAC - Added
                          - Updated RegEx patterns to fix issues with tagName matching and not handling data-* attributes properly
	*/
   public struct function parseHTMLTag(required string tag) {
	   var result = {};
          result.Tag = {};
          result.Tag.HTML = TRIM(arguments.tag);
          result.Tag.Name = "";
          result.Tag.Attribs = {};

         /*Create a pattern to find the tag name. While it
         might seem overkill to create a pattern just to
         find the name, I find it easier than dealing with
         token / list delimiters.*/
         result.NameRegEx = "<(\w+)\s+\w+.*?>"; // GAC UPDATE for TagName Matching
         result.NamePattern = CreateObject(
                              "java",
                              "java.util.regex.Pattern").Compile(result.NameRegEx);
                              // ORIGINAL REGEX -
                              // ^<(\w+) // ^<(\S*).*>$

         // Get the matcher for this pattern.
         result.NameMatcher = result.NamePattern.Matcher(arguments.tag);

         /*Check to see if we found the tag. We know there
         can only be ONE tag name, so using an IF statement
         rather than a conditional loop will help save us
         processing time.*/
         if ( result.NameMatcher.Find() ) {
            result.Tag.Name = UCase(TRIM(result.NameMatcher.Group(1)));
         }

         /*Now that we have a tag name, let's find the
         attributes of the tag. Remember, attributes may
         or may not have quotes around their values. Also,
         some attributes (while not XHTML compliant) might
         not even have a value associated with it (ex.
         disabled, readonly).*/
         result.AttrRegEx = "\s+([\w+\-\w+]*)(?:\s*=\s*(""[^""]*""|[^\s>]*))?"; // GAC UPDATED to add data-* attribute matching
         result.AttributePattern = CreateObject(
                                    "java",
                                    "java.util.regex.Pattern"
                                    ).Compile(result.AttrRegEx);
                                    // OLD REGEX - does not capture data-* attributes
                                    // "\s+(\w+)(?:\s*=\s*(""[^""]*""|[^\s>]*))?");

         // Get the matcher for the attribute pattern.
         result.AttributeMatcher = result.AttributePattern.Matcher(arguments.tag);

         /*Keep looping over the attributes while we
         have more to match.*/
         while ( result.AttributeMatcher.Find() ) {
            // Grab the attribute name.
            result.Name = result.AttributeMatcher.Group(JavaCast('int',1));

            /*Create an entry for the attribute in our attributes
            structure. By default, just set it the empty string.
            For attributes that do not have a name, we are just
            going to have to store this empty string.*/
            result.Tag.Attribs[ result.Name ] = "";

            /*Get the attribute value. Save this into a scoped
            variable because this might return a NULL value
            (if the group in our name-value pattern failed
            to match).*/
            result.Value = result.AttributeMatcher.Group(JavaCast('int',2));

            /*Check to see if we still have the value. If the
            group failed to match then the above would have
            returned NULL and destroyed our variable.*/
            if ( StructKeyExists(result,"Value") ) {

               /*We found the attribute. Now, just remove any
               leading or trailing quotes. This way, our values
               will be consistent if the tag used quoted or
               non-quoted attributes.*/
               result.Value = result.Value.ReplaceAll("^""|""$","");

               /*Store the value into the attribute entry back
               into our attributes structure (overwriting the
               default empty string).*/
               result.Tag.Attribs[ result.Name ] = result.Value;
            }
         }

	   return result.Tag;
   }

   /*
	   Name:
         calcStartEndRows(total,start,qty)
      Summary:
         Returns a structure with the start, end, rows and total based on the provided Total, Start and Qty parameters
      Arguments:
         numeric - total
         numeric - start
         string - qty
	   History:
         2023-06-22 - GAC - Added
         2024-01-21 - GAC - Added the rows and total to the return struct
	*/
   public struct function calcStartEndRows(required numeric total, numeric start=1, string qty="all") {
      var startRow = 1;
      var endRow = arguments.total;
      var totalCnt = arguments.total;
      var totalRows = arguments.total;

      if ( arguments.start GT startRow AND arguments.start LTE arguments.total )
         startRow = arguments.start;

      if ( IsNumeric(arguments.qty) AND arguments.qty LT arguments.total )
         endRow = startRow + arguments.qty - 1;

      if ( endRow < startRow )
         endRow = startRow;

      totalRows = endRow - startRow + 1;

      return { 'start': startRow, 'end': endRow, 'rows': totalRows, 'total': totalCnt  };
   }

   /* 
      Map Symbols & Punctuation to ASCII (20200604)
         Convert the Unicode punctuation and symbols to ASCII punctuation and symbols is imperative in Natural language processing (NLP) for preserving the original documents.
         Based on mapping from Lexical Systems Group: https://lexsrv3.nlm.nih.gov/LexSysGroup/Projects/lvg/2013/docs/designDoc/UDF/unicode/NormOperations/mapSymbolToAscii.html
         Blog: https://dev.to/gamesover/convert-symbols-punctuation-to-ascii-using-coldfusion-java-3l6a
         TryCF: https://trycf.com/gist/6f35220d47caa7fdbf75eb884ff1cec7

      History:
         2023-11-08 - GAC - Added
   */
   string function symbolsToASCII(required string inputString)
   {
      var retStr = javacast("string", arguments.inputString);
      retStr = retStr.replaceAll("[\u00B4\u02B9\u02BC\u02C8\u0301\u2018\u2019\u201B\u2032\u2034\u2037]", chr(39)); /* apostrophe (') */
      retStr = retStr.replaceAll("[\u00AB\u00BB\u02BA\u030B\u030E\u201C\u201D\u201E\u201F\u2033\u2036\u3003\u301D\u301E]", chr(34)); /* quotation mark (") */
      retStr = retStr.replaceAll("[\u00AD\u2010\u2011\u2012\u2013\u2014\u2212\u2015]", chr(45)); /* hyphen (-) */
      retStr = retStr.replaceAll("[\u01C3\u2762]", chr(33)); /* exclamation mark (!) */
      retStr = retStr.replaceAll("[\u266F]", chr(35)); /* music sharp sign (#) */
      retStr = retStr.replaceAll("[\u066A\u2052]", chr(37)); /* percent sign (%) */
      retStr = retStr.replaceAll("[\u066D\u204E\u2217\u2731\u00D7]", chr(42)); /* asterisk (*) */
      retStr = retStr.replaceAll("[\u201A\uFE51\uFF64\u3001]", chr(44)); /* comma (,) */
      retStr = retStr.replaceAll("[\u00F7\u0338\u2044\u2215]", chr(47)); /* slash (/) */
      retStr = retStr.replaceAll("[\u0589\u05C3\u2236]", chr(58)); /* colon (:) */
      retStr = retStr.replaceAll("[\u203D]", chr(63)); /* question mark (?) */
      retStr = retStr.replaceAll("[\u27E6]", chr(91)); /* opening square bracket ([) */
      retStr = retStr.replaceAll("[\u20E5\u2216]", chr(92)); /* backslash (\) */
      retStr = retStr.replaceAll("[\u301B]", chr(93));  /* closing square bracket ([) */
      retStr = retStr.replaceAll("[\u02C4\u02C6\u0302\u2038\u2303]", chr(94)); /* caret (^) */
      retStr = retStr.replaceAll("[\u02CD\u0331\u0332\u2017]", chr(95)); /* underscore (_) */
      retStr = retStr.replaceAll("[\u02CB\u0300\u2035]", chr(96)); /* grave accent (`) */
      retStr = retStr.replaceAll("[\u2983]", chr(123)); /* opening curly bracket ({) */
      retStr = retStr.replaceAll("[\u01C0\u05C0\u2223\u2758]", chr(124)); /* vertical bar / pipe (|) */
      retStr = retStr.replaceAll("[\u2016]", "#chr(124)##chr(124)#"); /* double vertical bar / double pipe (||) */
      retStr = retStr.replaceAll("[\u02DC\u0303\u2053\u223C\u301C]", chr(126)); /* tilde (~) */
      retStr = retStr.replaceAll("[\u2039\u2329\u27E8\u3008]", chr(60)); /* less-than sign (<) */
      retStr = retStr.replaceAll("[\u2264\u2266]", "#chr(60)##chr(61)#"); /* less-than equal-to sign (<=) */
      retStr = retStr.replaceAll("[\u203A\u232A\u27E9\u3009]", chr(62)); /* greater-than sign (>) */
      retStr = retStr.replaceAll("[\u2265\u2267]", "#chr(62)##chr(61)#"); /* greater-than equal-to sign (>=) */
      retStr = retStr.replaceAll("[\u200B\u2060\uFEFF]", chr(32)); /* space ( ) */
      retStr = retStr.replaceAll("\u2153", "1/3");
      retStr = retStr.replaceAll("\u2154", "2/3");
      retStr = retStr.replaceAll("\u2155", "1/5");
      retStr = retStr.replaceAll("\u2156", "2/5");
      retStr = retStr.replaceAll("\u2157", "3/5");
      retStr = retStr.replaceAll("\u2158", "4/5");
      retStr = retStr.replaceAll("\u2159", "1/6");
      retStr = retStr.replaceAll("\u215A", "5/6");
      retStr = retStr.replaceAll("\u215B", "1/8");
      retStr = retStr.replaceAll("\u215C", "3/8");
      retStr = retStr.replaceAll("\u215D", "5/8");
      retStr = retStr.replaceAll("\u215E", "7/8");
      retStr = retStr.replaceAll("\u2026", "\.\.\.");
      return retStr;
   }

   /*
      convertTagAttrsSingleToDoubleQuotes(inputStr)
         - convert single quotes inside HTML tags to double quotes.

      History:
         2023-11-08 - GAC - Created
         2024-09-06 - GAC - Updated the function and arguments definitions
   */
   string function convertTagAttrsSingleToDoubleQuotes(string inputStr="")
   {
       var pattern = "='([^']*?)'"; // regular expression pattern to match single quotes inside HTML tags
       var matchArr = reMatch(pattern,arguments.inputStr);
       var srcStr = '';
       var destStr = '';
       var tempRepChars = '_^^^_';

       for (var i=1; i <= ArrayLen(matchArr); i++ ) {
         srcStr = matchArr[i];
         destStr = srcStr;

         // preserve inner double quotes as replacement chars
         destStr = replace(destStr, '"', tempRepChars, "all");

         // replace single quotes with double
         destStr = reReplace(destStr, "\'", '"', "all");

         // convert replacement chars to a single quote
         destStr = replace(destStr, tempRepChars, "'", "all");

         arguments.inputStr = replace(arguments.inputStr, srcStr , destStr, "one");
       }

       return arguments.inputStr;
   }

   /*
   	listToEmptyStruct(list)

   	Summary:
         Converts a list of values to be the keys of a structure with empty values.

      Arguments:
         string - list

   	History:
         2024-07-09 - GAC - Created
   */
   struct function listToEmptyStruct(required string list)
   {
      var retStruct = {};
      var arr = ListToArray(arguments.list);

      for ( var i=1; i <= ArrayLen(arr); i++ ) {
         retStruct[arr[i]] = '';
      }

      return retStruct;
   }

   /*
   	generateRandomString(length,chars)

   	Summary:
         Creates a random string of a desired length, using a pre-defined character list

         Based on:
         https://lunaticthinker.me/index.php/cfml-generating-random-string/
      Arguments:
         length - integer
         chars - string
   	History:
         2024-07-29 - GAC - Added
   */
   public string function generateRandomString(required numeric length, string chars="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
   {
        var retStr = "";

        // remove commas if 'chars' is passed as a comma-delimitered list
        arguments.chars = listChangeDelims(arguments.chars, '', ',', false);

        for ( var i = 1; i <= arguments.length; i++ ) {
            retStr &= Mid(arguments.chars, RandRange(1, len(arguments.chars), "SHA1PRNG"), 1);
        }
        return retStr;
   }

   /*
   	removeDuplicatesFromArray(input,sortArray,sortType,sortDirection)

   	Summary:
         Removes duplicates from an array (with extra parameters for sorting the result array)

         Based on:
         https://adampresley.github.io/2009/10/22/removing-duplicate-array-items-in-coldfusion.html
      Arguments:
         input - array
         sortArray - boolean
         sortType - string
         sortDirection - string
      Returns:
	      String
   	History:
         2024-07-29 - GAC - Added
   */
   public string function removeDuplicatesFromArray(required array input,boolean sortArray="false",string sortType="textnocase",string sortDirection="asc")
   {
      var list = arguments.input.subList(0, arguments.input.size());
      var set = createObject("java", "java.util.LinkedHashSet").init(list);
      var result = [];

      result.addAll(set);

      if (arguments.sortArray) {
         arraySort(result, arguments.sortType, arguments.sortDirection);
      }

      return result;
   }

   /*
   	parseUrl(sUrl)

      Summary:
         Parses a Url and returns a struct with keys defining the information in the Uri.

         https://cflib.org/udf/parseUrl
            @param sURL      String to parse. (Required)
            @return Returns a struct.
            @author Dan G. Switzer, II (dswitzer@pengoworks.com)
            @version 1, January 10, 2007
      Arguments:
         sUrl - string
      Returns:
	      Struct
      History:
         2024-11-20 - GAC - Added
   */
   public struct function parseUrl(required string sUrl){
       // var to hold the final structure
       var stUrlInfo = structNew();
       // vars for use in the loop, so we don't have to evaluate lists and arrays more than once
       var i = 1;
       var sKeyPair = "";
       var sKey = "";
       var sValue = "";
       var aQSPairs = "";
       var sPath = "";
       /*
           from: http://www.ietf.org/rfc/rfc2396.txt
           ^((([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*)))?
            123            4  5          6       7  8        9 A

               scheme    = $3
               authority = $5
               path      = $6
               query     = $8
               fragment  = $10 (A)
       */
       var sUriRegEx = "^(([^:/?##]+):)?(//([^/?##]*))?([^?##]*)(\?([^##]*))?(##(.*))?";
       /*
           separates the authority into user info, domain and port
           ^((([^@:]+)(:([^@]+))?@)?([^:]*)?(:(.*)))?
            123       4 5           6       7 8

               username  = $3
               password  = $5
               domain    = $6
               port      = $8
       */
       var sAuthRegEx = "^(([^@:]+)(:([^@]+))?@)?([^:]*)?(:(.*))?";
       /*
           separates the path into segments & parameters
           ((/?[^;/]+)(;([^/]+))?)
           12         3 4

               segment     = $1
               path        = $2
               parameters  = $4
       */
       var sSegRegEx = "(/?[^;/]+)(;([^/]+))?";

       // parse the url looking for info
       var stUriInfo = reFindNoCase(sUriRegEx, sUrl, 1, true);
       // this is for the authority section
       var stAuthInfo = "";
       // this is for the segments in the path
       var stSegInfo = "";

       // create empty keys
       stUrlInfo["scheme"] = "";
       stUrlInfo["authority"] = "";
       stUrlInfo["path"] = "";
       stUrlInfo["directory"] = "";
       stUrlInfo["file"] = "";
       stUrlInfo["query"] = "";
       stUrlInfo["fragment"] = "";
       stUrlInfo["domain"] = "";
       stUrlInfo["port"] = "";
       stUrlInfo["username"] = "";
       stUrlInfo["password"] = "";
       stUrlInfo["params"] = structNew();

       // get the scheme
       if( stUriInfo.len[3] gt 0 ) stUrlInfo["scheme"] = mid(sUrl, stUriInfo.pos[3], stUriInfo.len[3]);
       // get the authority
       if( stUriInfo.len[5] gt 0 ) stUrlInfo["authority"] = mid(sUrl, stUriInfo.pos[5], stUriInfo.len[5]);
       // get the path
       if( stUriInfo.len[6] gt 0 ) stUrlInfo["path"] = mid(sUrl, stUriInfo.pos[6], stUriInfo.len[6]);
       // get the path
       if( stUriInfo.len[8] gt 0 ) stUrlInfo["query"] = mid(sUrl, stUriInfo.pos[8], stUriInfo.len[8]);
       // get the fragment
       if( stUriInfo.len[10] gt 0 ) stUrlInfo["fragment"] = mid(sUrl, stUriInfo.pos[10], stUriInfo.len[10]);

       // break authority into user info, domain and ports
       if( len(stUrlInfo["authority"]) gt 0 ){
           // parse the authority looking for info
           stAuthInfo = reFindNoCase(sAuthRegEx, stUrlInfo["authority"], 1, true);

           // get the domain
           if( stAuthInfo.len[6] gt 0 ) stUrlInfo["domain"] = mid(stUrlInfo["authority"], stAuthInfo.pos[6], stAuthInfo.len[6]);
           // get the port
           if( stAuthInfo.len[8] gt 0 ) stUrlInfo["port"] = mid(stUrlInfo["authority"], stAuthInfo.pos[8], stAuthInfo.len[8]);
           // get the username
           if( stAuthInfo.len[3] gt 0 ) stUrlInfo["username"] = mid(stUrlInfo["authority"], stAuthInfo.pos[3], stAuthInfo.len[3]);
           // get the password
           if( stAuthInfo.len[5] gt 0 ) stUrlInfo["password"] = mid(stUrlInfo["authority"], stAuthInfo.pos[5], stAuthInfo.len[5]);
       }

       // the query string in struct form
       stUrlInfo["params"]["segment"] = structNew();

       // if the path contains any parameters, we need to parse them out
       if( find(";", stUrlInfo["path"]) gt 0 ){
           // this is for the segments in the path
           stSegInfo = reFindNoCase(sSegRegEx, stUrlInfo["path"], 1, true);

           // loop through all the segments and build the strings
           while( stSegInfo.pos[1] gt 0 ){
               // build the path, excluding parameters
               sPath = sPath & mid(stUrlInfo["path"], stSegInfo.pos[2], stSegInfo.len[2]);

               // if there are some parameters in this segment, add them to the struct
               if( stSegInfo.len[4] gt 0 ){

                   // put the parameters into an array for easier looping
                   aQSPairs = listToArray(mid(stUrlInfo["path"], stSegInfo.pos[4], stSegInfo.len[4]), ";");

                   // now, loop over the array and build the struct
                   for( i=1; i lte arrayLen(aQSPairs); i=i+1 ){
                       sKeyPair = aQSPairs[i]; // current pair
                       sKey = listFirst(sKeyPair, "="); // current key
                       // make sure there are 2 keys
                       if( listLen(sKeyPair, "=") gt 1){
                           sValue = urlDecode(listLast(sKeyPair, "=")); // current value
                       } else {
                           sValue = ""; // set blank value
                       }
                       // check if key already added to struct
                       if( structKeyExists(stUrlInfo["params"]["segment"], sKey) ) stUrlInfo["params"]["segment"][sKey] = listAppend(stUrlInfo["params"]["segment"][sKey], sValue); // add value to list
                       else structInsert(stUrlInfo["params"]["segment"], sKey, sValue); // add new key/value pair
                   }
               }

               // get the ending position
               i = stSegInfo.pos[1] + stSegInfo.len[1];

               // get the next segment
               stSegInfo = reFindNoCase(sSegRegEx, stUrlInfo["path"], i, true);
           }

       } else {
           // set the current path
           sPath = stUrlInfo["path"];
       }

       // get the file name
       stUrlInfo["file"] = getFileFromPath(sPath);
       // get the directory path by removing the file name
       if( len(stUrlInfo["file"]) gt 0 ){
           stUrlInfo["directory"] = replace(sPath, stUrlInfo["file"], "", "one");
       } else {
           stUrlInfo["directory"] = sPath;
       }

       // the query string in struct form
       stUrlInfo["params"]["url"] = structNew();

       // if query info was supplied, break it into a struct
       if( len(stUrlInfo["query"]) gt 0 ){
           // put the query string into an array for easier looping
           aQSPairs = listToArray(stUrlInfo["query"], "&");

           // now, loop over the array and build the struct
           for( i=1; i lte arrayLen(aQSPairs); i=i+1 ){
               sKeyPair = aQSPairs[i]; // current pair
               sKey = listFirst(sKeyPair, "="); // current key
               // make sure there are 2 keys
               if( listLen(sKeyPair, "=") gt 1){
                   sValue = urlDecode(listLast(sKeyPair, "=")); // current value
               } else {
                   sValue = ""; // set blank value
               }
               // check if key already added to struct
               if( structKeyExists(stUrlInfo["params"]["url"], sKey) ) stUrlInfo["params"]["url"][sKey] = listAppend(stUrlInfo["params"]["url"][sKey], sValue); // add value to list
               else structInsert(stUrlInfo["params"]["url"], sKey, sValue); // add new key/value pair
           }
       }

       // return the struct
       return stUrlInfo;
   }

   /*
   	deepCopy(obj)

      Summary:
         Wrapper function for the the CommonSpot.deepCopy() method. This method is the same as using Server.CommonSpot.UDF.util.duplicateBean(bean).

      Note:
         As of the January 2022 releases of CommonSpot (10.8.1, 10.7.2, and 10.6.4), the implementation of clone() for CommonSpot objects has been updated to return a 'shallow' copy (more like the CFML StructCopy() method).

         This method is more robust against crashes caused by circular references and has more consistent results when the source object contains multiple references to one or more members.
      Arguments:
         obj - any
      Returns:
	      Any
      History:
         2025-01-25 - GAC - Added
   */
   public any function deepCopy(required any obj){
      return Server.CommonSpot.deepCopy(arguments.obj);
      // return Server.CommonSpot.UDF.util.duplicateBean(bean=arguments.obj);
   }
}