
    #i                        d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ej                  j                  d e ee      j                         j                                ddlmZmZmZmZmZmZ ddlmZ ddlmZmZ  ej:                  e      ZdZ d	ed
e	e   fdZ!de	e   d
e	e   fdZ"	 	 dd	edede#de#d
eee
f   f
dZ$	 dd	edede%d
eee
f   fdZ&	 dd	edede%d
eee
f   fdZ'd	eded
eee
f   fdZ(d Z)edk(  r e)        yy)a.  Dictionary-based compression for workspace memory files.

Learns high-frequency phrases from the workspace, builds a codebook,
and applies lossless substitution compression.

Usage:
    python3 dictionary_compress.py <workspace> --build       # Scan + generate codebook
    python3 dictionary_compress.py <workspace> --compress     # Apply codebook
    python3 dictionary_compress.py <workspace> --decompress   # Expand codes back
    python3 dictionary_compress.py <workspace> --stats        # Show compression effect

Part of claw-compactor. License: MIT.
    N)Path)DictListAny)build_codebookcompress_textdecompress_textsave_codebookload_codebookcompression_stats)estimate_tokens)FileNotFoundError_MemCompressErrorzmemory/.codebook.json	workspacereturnc                 (   g }dD ])  }| |z  }|j                         s|j                  |       + | dz  }|j                         rLt        |j	                  d            D ]/  }|j
                  j                  d      r|j                  |       1 |S )z(Collect all markdown files in workspace.)z	MEMORY.mdzTOOLS.mdz	AGENTS.mdzSOUL.mdzUSER.mdmemoryz*.md.)existsappendis_dirsortedglobname
startswith)r   filesr   pmem_dirfs         ?/home/crogers2287/claw-compactor/scripts/dictionary_compress.py_collect_md_filesr!   $   s    EL 88:LLO ("G~~V,- 	 A66$$S)Q	  L    r   c                 N    | D cg c]  }|j                  dd       c}S c c}w )z&Read all files into a list of strings.utf-8replaceencodingerrors)	read_text)r   r   s     r    _read_textsr*   3   s#    EJKAKKK;KKKs   "codebook_pathmin_freqmax_entriesc                     t        |       }t        |      }t        |||      }t        ||       t	        |      t        |      t	        |      dS )z"Scan workspace and build codebook.)r,   r-   )codebook_entriesr+   files_scanned)r!   r*   r   r
   lenstr)r   r+   r,   r-   r   textscbs          r    	cmd_buildr5   8   sP     i(EE	k	JB"m$G]+U r"   dry_runc                    t        |      }t        |       }d}d}|D ]W  }|j                  dd      }t        |      }	t	        ||      }
t        |
      }||	z  }||z  }|rE|j                  |
d       Y t        |      ||||z
  |dS )z2Apply codebook compression to all workspace files.r   r$   r%   r&   r'   )r   tokens_beforetokens_aftertokens_savedr6   )r   r!   r)   r   r   
write_textr1   )r   r+   r6   r4   r   total_beforetotal_afterr   textbefore
compressedafters               r    cmd_compressrC   J   s     
}	%Bi(ELK 7{{GI{> &"4,

+uLLgL67 U%#${2 r"   c                     t        |      }t        |       }|D ]7  }|j                  dd      }t        ||      }|r%|j	                  |d       9 t        |      |dS )z/Expand codebook codes back to original phrases.r$   r%   r&   r8   )r   r6   )r   r!   r)   r	   r<   r1   )r   r+   r6   r4   r   r   r?   decompresseds           r    cmd_decompressrF   h   si     
}	%Bi(E 9{{GI{>&tR0LLL8	9 ZG44r"   c                     t        |      }t        |       }t        |      }dj                  |      }t	        ||      }t        |||      }t        |      |d<   |S )zShow compression statistics.
r   )r   r!   r*   joinr   r   r1   )r   r+   r4   r   r3   combinedrA   statss           r    	cmd_statsrL   z   s\    
 
}	%Bi(EEyyHx,Jh
B7EZE'NLr"   c                     t        j                  d      } | j                  dd       | j                  d      }|j                  dd	d
       |j                  dd	d       |j                  dd	d       |j                  dd	d       | j                  dd d       | j                  dd	       | j                  dd	       | j	                         }t        |j                        }|j                  rt        |j                        n|t        z  }|j                  rt        ||      }nV|j                  rt        |||j                        }n1|j                  rt        |||j                        }nt!        ||      }|j"                  r!t%        t#        j&                  |d             y |j)                         D ]  \  }}t%        | d|         y )NzDictionary-based compression)descriptionr   zWorkspace directory)helpT)requiredz--build
store_truezBuild codebook)actionrO   z
--compresszApply compressionz--decompresszExpand codesz--statsz
Show statsz
--codebookzCodebook path)defaultrO   z	--dry-run)rR   z--json)r6      )indentz: )argparseArgumentParseradd_argumentadd_mutually_exclusive_group
parse_argsr   r   codebookDEFAULT_CODEBOOK_PATHbuildr5   compressrC   r6   
decompressrF   rL   jsonprintdumpsitems)parsergroupargswscb_pathresultkvs           r    mainrl      s   $$1OPF
*?@///>E	y<LM	|L?RS	~lP	yLI
dI
L9
6D	dnn	B%)]]d4==!=R8RGzz2w'	b'4<<@	GT\\B2w'yydjj*+LLN 	DAqQCr!+	r"   __main__)      )F)*__doc__rV   r`   loggingsyspathlibr   typingr   r   r   pathinsertr2   __file__resolveparentlib.dictionaryr   r   r	   r
   r   r   
lib.tokensr   lib.exceptionsr   r   	getLogger__name__loggerr\   r!   r*   intr5   boolrC   rF   rL   rl    r"   r    <module>r      s      
  " " 3tH~--/667 8  ' ?			8	$/  $t* LtDz Ld3i L 	  	
 
#s(^*   
#s(^	B 555 5 
#s(^	5$ 
#s(^@ zF r"   