
    #i                     t   d Z ddlZddlZddlmZ ddlmZmZmZm	Z	  ej                  e      ZdgZ ej                  d      Zddedeee      d	efd
Zdeded	efdZddeded	e	eeeef   f   fdZdedeeef   d	efdZded	efdZded	efdZddedeee      d	efdZddededeeeef      d	efdZy)u&  Run-Length Encoding for structured data patterns.

Detects and compresses structured repetitive patterns:
- IP address families → common prefix extraction
- File paths → $WS/ shorthand
- Enumeration lists → compact format
- Repeated section headers

Part of claw-compactor. License: MIT.
    N)Counter)DictListOptionalTuplez/home/user/workspacez(\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\btextworkspace_pathsreturnc                 v    | sy|xs t         }| }t        |t        d      D ]  }|j                  |d      } |S )z0Replace long workspace paths with $WS shorthand. T)keyreverse$WS)DEFAULT_WS_PATHSsortedlenreplace)r   r	   pathsresultwss        3/home/crogers2287/claw-compactor/scripts/lib/rle.pycompress_pathsr      sE    //EFUT2 +E*+M    workspace_pathc                 ,    | sy| j                  d|      S )z+Expand $WS back to the full workspace path.r   r   )r   )r   r   s     r   decompress_pathsr   '   s    <<~..r   min_occurrencesc                    | sdi fS t         j                  |       }|s| i fS i }|D ]K  }|j                  d      }dj                  |dd       dz   }|j	                  |g       j                  |       M i }| }d}	t        |j                         d       D ]g  \  }}
t        |
      |k  r|	dkD  rd|	 nd}|||<   t        |
      D ]/  }|j                  d      }|d   }|j                  || d|       }1 |	d	z  }	i ||fS )
zGroup IPs by common prefix and compress families.

    Returns (compressed_text, prefix_map) where prefix_map maps
    $IPn labels to the common prefix.
    Only compresses families with min_occurrences+ IPs sharing a 3-octet prefix.
    r   .N   r   c                      t        | d          S N   )r   )xs    r   <lambda>z&compress_ip_families.<locals>.<lambda>G   s    3qt9* r   )r   z$IPr#   )_IP_REfindallsplitjoin
setdefaultappendr   itemsr   setr   )r   r   ipsfamiliesippartsprefix
prefix_mapr   idxmemberslabelsuffixs                r   compress_ip_familiesr8   .   s<    2v
..
CRx &(H 3%)$s*FB'..r23 "$JF
C!(.."28LM 	w</)"Qw#cUE"
5g, 	=BHHSME1XF^^B5'6((;<F	= 	q	 :r   r3   c                     | r|s| S | }|j                         D ]E  \  }t        j                  t        j                  |      dz         }|j	                  fd|      }G |S )z1Expand compressed IP references back to full IPs.z\.(\d{1,3})c                 ,    | j                  d      z   S r"   )group)mr2   s    r   r%   z(decompress_ip_families.<locals>.<lambda>]   s    v
': r   )r,   recompileescapesub)r   r3   r   r6   patternr2   s        @r   decompress_ip_familiesrB   U   sd    zF#))+ Dv**RYYu->?:FCD Mr   c                     | syt        j                  d      }dt         j                  dt        fd}|j	                  ||       S )u   Compress comma-separated lists of ALL-CAPS short codes.

    Only compresses lists with 4+ items that are all uppercase short tokens.
    E.g. "BTC, ETH, SOL, BNB, DOGE" → "[BTC,ETH,SOL,BNB,DOGE]"
    r   z=((?:[A-Z][A-Z0-9]{1,6})(?:\s*,\s*(?:[A-Z][A-Z0-9]{1,6})){3,})r<   r
   c                     | j                  d      j                  d      D cg c]  }|j                          }}ddj                  |      z   dz   S c c}w )Nr   ,[])r;   r(   stripr)   )r<   sr,   s      r   _compactz'compress_enumerations.<locals>._compactm   sK    $%GGAJ$4$4S$9:q::SXXe_$s** ;s   A)r=   r>   Matchstrr@   )r   rA   rJ   s      r   compress_enumerationsrM   a   sE      jjYZG+BHH + + ;;x&&r   c                 :   | sy| j                  d      }i }g }d}|t        |      k  r||   }|j                  d      r|j                  d      j	                         }||v rw|dz  }|t        |      k  rc||   j                  d      sO||   j	                         r|j                  ||          |dz  }|t        |      k  r||   j                  d      sOt        |      ||<   |j                  |       |dz  }|t        |      k  rdj                  |      S )zCompress repeated identical section headers.

    When the same header text appears multiple times, keep only the first
    and merge contents.
    r   
r   #r#   )r(   r   
startswithlstriprH   r+   r)   )r   linesseen_headersr   ilineheader_texts          r   compress_repeated_headersrX   t   s    JJtE#%LF	A
c%j.Qx??3++c*002Kl*Q#e*nU1X-@-@-EQx~~'eAh/FA #e*nU1X-@-@-E ,/K[)d	Q! c%j." 99Vr   c                 V    | syt        | |      }t        |      \  }}t        |      }|S )z+Apply all RLE-style compressions to *text*.r   )r   r8   rM   )r   r	   r   _s       r   compressr[      s1    D/2F$V,IFA"6*FMr   ip_prefix_mapc                 @    | syt        | |      }|rt        ||      }|S )z#Reverse all RLE-style compressions.r   )r   rB   )r   r   r\   r   s       r   
decompressr^      s(    dN3F'>Mr   )N)   )__doc__r=   loggingcollectionsr   typingr   r   r   r   	getLogger__name__loggerr   r>   r&   rL   r   r   intr8   rB   rM   rX   r[   r^    r   r   <module>ri      sV  	 
   . .			8	$  
 
?	@ xS	/B c /3 / / /$s $S $sDQTVYQYNGZA[ $N	 	$sCx. 	S 	' ' '&C C @3 $s))<  S # htCQTH~>V be r   