#!/usr/bin/env python3
"""Smart X Engagement - Tuned for quality replies only"""
import asyncio
import json
import base64
import os
import random
import re
import sys
import urllib.request
from datetime import datetime, timezone
from pathlib import Path

os.environ.setdefault("DISPLAY", ":99")

from patchright.async_api import async_playwright

PROFILE_DIR = Path("/workspace/browser_profile/x_headed")
COOKIES_PATH = Path("/workspace/scripts/x_cookies.json")
REPLIED_USERS_PATH = Path("/workspace/replied_users.json")
POST_LOG_PATH = Path("/workspace/x_posts_log.jsonl")

# TUNED: More specific, sassy voice
VOICE_PROFILE = '''You're a cannabis enthusiast in your late 20s.

VIBE:
- Sassy, self-deprecating humor about weed spending
- Lowercase, minimal punctuation, gen-z twitter speak
- 0-1 emojis max (use sparingly)
- Sound like a real person, not a brand
- React to what you SEE in the image

STRICT RULES:
- If image shows flower → mention smoking/vaping/rolling
- If image shows concentrates/oil → mention dabbing
- If image shows edibles → mention eating
- If unclear → skip (do not reply)
- Never sound corporate or generic
- Keep under 120 chars
- NO transparency, excited, thrilled, great news'''


GEMINI_API_KEY = "AIzaSyBSGlZewG3RfqKJsPJr-ZR_BTJdLoxpqg"

# TUNED: Search queries that find COMMUNITY content (photos, hauls, reviews)
# NOT news/legislation. Avoid: "medical marijuana", "cannabis legislation"
SEARCH_QUERIES = [
    # General cannabis community content
    "weed haul",
    "dispensary haul", 
    "cannabis haul",
    "fresh pickup weed",
    "strain review",
    "dabs haul",
    "rosin pickup",
    "live resin",
    "flower review",
    "concentrate review",
    
    # Patient/lifestyle content
    "med card finally",
    "first time dispensary",
    "weed stash",
    "cannabis collection",
    "smoking on",
    "dabbing on", 
    "session with",
    "bowl of",
    
    # Community engagement
    "what you smoking",
    "what strain",
    "new drop",
    "fresh batch",
    "just picked up"
]


def load_replied_users():
    if REPLIED_USERS_PATH.exists():
        data = json.loads(REPLIED_USERS_PATH.read_text())
        cutoff = datetime.now().timestamp() - (7 * 24 * 3600)
        return {k: v for k, v in data.items() if v > cutoff}
    return {}

def save_replied_user(username):
    users = load_replied_users()
    users[username.lower()] = datetime.now().timestamp()
    REPLIED_USERS_PATH.write_text(json.dumps(users))

def already_replied_to(username):
    return username.lower() in load_replied_users()

def log_post(author, tweet_text, reply, tweet_url=""):
    """Log what we posted for review."""
    entry = {
        "timestamp": datetime.now().isoformat(),
        "author": author,
        "tweet_text": tweet_text[:100],
        "reply": reply,
        "tweet_url": tweet_url
    }
    with open(POST_LOG_PATH, "a") as f:
        f.write(json.dumps(entry) + "\n")

def is_tweet_recent(time_text, max_hours=24):
    """Strict time check - only accept tweets from last 24 hours."""
    if not time_text: return False
    t = time_text.strip().lower()
    
    # Minutes - always recent
    if re.match(r'^\d+m$', t): 
        return True
    
    # Hours - check against limit
    m = re.match(r'^(\d+)h$', t)
    if m: 
        hours = int(m.group(1))
        return hours <= max_hours
    
    # Days - reject anything older than 1 day for 24h mode
    d = re.match(r'^(\d+)d$', t)
    if d:
        days = int(d.group(1))
        return days <= 1 and max_hours <= 24
    
    # Years - always reject
    if re.match(r'^(\d+)y$', t):
        return False
    
    # Any date with month name - reject (too old)
    if re.search(r'(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)', t): 
        return False
    
    # Any 4-digit year (2023, 2024, etc) - reject
    if re.search(r'20\d\d', t):
        return False
    
    # Anything else we don't recognize - reject to be safe
    return False

def load_cookies():
    if not COOKIES_PATH.exists(): return []
    raw = json.loads(COOKIES_PATH.read_text())
    return [{
        "name": n, "value": raw[n], "domain": ".x.com", "path": "/", 
        "secure": True, "sameSite": "None" if n != "ct0" else "Lax"
    } for n in ["auth_token", "ct0", "twid", "kdt", "guest_id", "personalization_id"] 
      if n in raw and raw[n]]

def is_news_or_legislative(text):
    """EXPANDED: Skip news/legislation/politics/journalism."""
    text_lower = text.lower()
    news_keywords = [
        # Politics/Legislation
        'senate', 'committee', 'bill', 'legislation', 'vote', 'law', 'regulation',
        'political', 'governor', 'senator', 'representative', 'house of representatives',
        'passed', 'approved', 'rejected', 'measure', 'amendment', 'hearing', 'session',
        'congress', 'assembly', 'capitol', 'tallahassee', 'washington',
        # News/Journalism
        'news', 'breaking', 'report', 'announced', 'according to', 'officials',
        'fox news', 'cnn', 'msnbc', 'reuters', 'associated press', 'bloomberg',
        'journalist', 'reporter', 'coverage', 'exclusive', 'sources say',
        # Legal/Government
        'court', 'lawsuit', 'attorney', 'legal', 'ruling', 'justice', 'supreme court',
        'administration', 'agency', 'department', 'bureau',
        # Medical/Research (often news)
        'study finds', 'research shows', 'scientists', 'medical study', 'clinical trial',
        # General news markers
        'update:', 'developing:', 'just in:', 'alert:', 'watch live', 'press conference'
    ]
    return any(kw in text_lower for kw in news_keywords)

def is_low_quality_reply(reply):
    """Check if reply is generic/bad."""
    if not reply or len(reply) < 5:
        return True
    reply_lower = reply.lower()
    bad_phrases = [
        'nice!', 'great!', 'awesome!', 'cool!', 'wow!',
        'love this', 'amazing', 'fantastic', 'excellent',
        'thanks for sharing', 'interesting',
        'been wanting to try', 'solid pickup'
    ]
    return any(bp in reply_lower for bp in bad_phrases)

async def analyze_image(tweet_text, img_b64, username, dry_run=False):
    '''TUNED: Better prompting for quality replies - NO HALLUCINATING.'''
    prompt = VOICE_PROFILE + f'''

Look at this tweet from @{username}:
"{tweet_text}"

Look at the IMAGE carefully. Be HONEST about what you see.

If the image clearly shows:
- Cannabis FLOWER/BUDS → mention smoking, vaping, rolling, or the strain
- Cannabis CONCENTRATES/OIL/DABS → mention dabbing  
- Cannabis EDIBLES → mention eating
- Multiple products → pick the main one

If you CANNOT clearly identify what's in the image, or if it's:
- A receipt, packaging, label
- A logo or text graphic
- Unclear/blurry
- News screenshot
- Multiple products mixed together you can't separate

Then respond with: SKIP

DO NOT GUESS. DO NOT HALLUCINATE products that aren't clearly visible.

Format your response exactly as:
REPLY: <your reply here> OR SKIP

Examples of good replies:
- my wallet did not need this today
- say less
- not them dropping fire when i am broke
- that is gonna hit different
- hows it smelling

Examples of BAD replies (never do these):
- nice!
- solid pickup
- been wanting to try that
- great product
- love this
- Mentioning specific strains/products you cannot see

REPLY:'''
    
    if dry_run:
        print(f"  [DRY RUN] Would analyze image for @{username}")
        return "[dry run reply]"
    
    try:
        payload = json.dumps({
            "contents": [{"parts": [
                {"text": prompt},
                {"inline_data": {"mime_type": "image/png", "data": img_b64}}
            ]}],
            "generationConfig": {"temperature": 0.3, "maxOutputTokens": 80}  # Lower temp = less creative/more accurate
        }).encode()
        
        req = urllib.request.Request(
            f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}",
            data=payload, headers={"Content-Type": "application/json"})
        
        with urllib.request.urlopen(req, timeout=30) as resp:
            r = json.loads(resp.read().decode())["candidates"][0]["content"]["parts"][0]["text"]
            
            # Extract reply
            reply = None
            if "REPLY:" in r:
                reply = r.split("REPLY:", 1)[1].strip()
            else:
                reply = r.strip()
            
            reply = reply.strip('"\'').strip()
            
            # Skip check
            if reply.upper() == "SKIP" or "skip" in reply.lower():
                print(f"  ⏭️  Gemini said SKIP", flush=True)
                return None
            
            # Validate quality
            if is_low_quality_reply(reply):
                print(f"  ⏭️  Low quality reply rejected: {reply}", flush=True)
                return None
            
            if len(reply) > 200:
                reply = reply[:197] + "..."
            
            return reply if len(reply) >= 5 else None
            
    except Exception as e:
        print(f"  Gemini err: {e}", flush=True)
    return None

async def run(reply_count=1, like_count=3, dry_run=False):
    print(f"🧠 Smart Engagement {'[DRY RUN]' if dry_run else ''}", flush=True)
    stats = {"likes": 0, "replies": 0, "checked": 0, "skipped_news": 0, "skipped_no_image": 0}
    PROFILE_DIR.mkdir(parents=True, exist_ok=True)
    
    async with async_playwright() as p:
        ctx = await p.chromium.launch_persistent_context(
            str(PROFILE_DIR), headless=False, 
            viewport={"width": 1366, "height": 768},
            args=["--no-sandbox", "--disable-dev-shm-usage"])
        
        await ctx.add_cookies(load_cookies())
        page = ctx.pages[0] if ctx.pages else await ctx.new_page()
        
        await page.goto("https://x.com/home", wait_until="domcontentloaded")
        await asyncio.sleep(3)
        if "login" in page.url.lower():
            print("❌ Not logged in", flush=True)
            await ctx.close()
            return stats
        
        q = random.choice(SEARCH_QUERIES)
        print(f"🔍 {q}", flush=True)
        await page.goto(f"https://x.com/search?q={q.replace(' ','%20')}&f=live", wait_until="domcontentloaded")
        await asyncio.sleep(4)
        await page.mouse.wheel(0, 400)
        await asyncio.sleep(2)
        
        tweets = await page.locator('[data-testid="tweet"]').all()
        print(f"📄 Found {len(tweets)} tweets to check", flush=True)
        
        for tweet in tweets[:10]:
            if stats["replies"] >= reply_count and stats["likes"] >= like_count: 
                break
            
            stats["checked"] += 1
            
            try:
                time_el = tweet.locator('time').first
                time_text = ""
                if await time_el.count():
                    time_text = await time_el.inner_text()
                    # Also get the datetime attribute for verification
                    datetime_attr = await time_el.get_attribute('datetime') or ""
                    print(f"  ⏰ Time: '{time_text}' | attr: '{datetime_attr[:25]}'", flush=True)
                    
                    # STRICT: Reject if datetime attr shows old date
                    if datetime_attr:
                        try:
                            from datetime import timezone
                            tweet_date = datetime.fromisoformat(datetime_attr.replace('Z', '+00:00'))
                            hours_old = (datetime.now(timezone.utc) - tweet_date).total_seconds() / 3600
                            if hours_old > 24:
                                print(f"  ⏭️  Tweet is {hours_old:.1f}h old - REJECT", flush=True)
                                continue
                        except Exception as e:
                            print(f"  ⚠️  datetime parse err: {e}", flush=True)
                    
                    if not is_tweet_recent(time_text): 
                        print(f"  ⏭️  Time text '{time_text}' too old", flush=True)
                        continue
                else:
                    print(f"  ⏭️  No time element", flush=True)
                    continue
                
                txt = await tweet.locator('[data-testid="tweetText"]').first.inner_text() if await tweet.locator('[data-testid="tweetText"]').count() else ""
                
                author = "unknown"
                try:
                    a = tweet.locator('[data-testid="User-Name"] a[role="link"]').first
                    if await a.count(): 
                        href = await a.get_attribute("href")
                        author = href.strip("/").split("/")[-1] if href else "unknown"
                except: 
                    pass
                
                print(f"\n👤 @{author}: {txt[:40]}... ({time_text})", flush=True)
                
                # Skip conditions
                if is_news_or_legislative(txt):
                    print(f"  ⏭️  NEWS/POLITICS - skipping", flush=True)
                    stats["skipped_news"] += 1
                    continue
                
                if already_replied_to(author): 
                    print(f"  ⏭️  Already replied", flush=True)
                    continue
                
                # Like (always safe)
                if stats["likes"] < like_count:
                    btn = tweet.locator('[data-testid="like"]').first
                    if await btn.is_visible():
                        try: 
                            await btn.click(timeout=3000)
                            stats["likes"] += 1
                            print(f"  ❤️  Liked @{author}", flush=True)
                            await asyncio.sleep(0.5)
                        except: 
                            pass
                
                # Reply (only with image + quality check)
                if stats["replies"] < reply_count:
                    imgs = await tweet.locator('[data-testid="tweetPhoto"] img').all()
                    
                    if not imgs:
                        print(f"  ⏭️  No image - skipping reply", flush=True)
                        stats["skipped_no_image"] += 1
                        continue
                    
                    # Analyze image
                    reply = None
                    try:
                        img_data = base64.b64encode(await imgs[0].screenshot()).decode()
                        reply = await analyze_image(txt, img_data, author, dry_run)
                    except Exception as e:
                        print(f"  Image analysis failed: {e}", flush=True)
                        continue
                    
                    if not reply:
                        print(f"  ⏭️  No quality reply generated", flush=True)
                        continue
                    
                    if is_low_quality_reply(reply):
                        print(f"  ⏭️  Reply failed quality check: {reply}", flush=True)
                        continue
                    
                    print(f"  📝 {reply}", flush=True)
                    
                    if dry_run:
                        print(f"  [DRY RUN] Would post reply", flush=True)
                        stats["replies"] += 1
                        save_replied_user(author)
                        continue
                    
                    # Post reply
                    await tweet.locator('time').locator('..').first.click(timeout=3000)
                    await asyncio.sleep(2)
                    await page.keyboard.press("r")
                    await asyncio.sleep(2)
                    
                    ta = page.locator('[data-testid="tweetTextarea_0"]').first
                    if await ta.is_visible(timeout=3000):
                        await ta.fill("")
                        await ta.type(reply, delay=random.randint(40, 80))
                        await asyncio.sleep(1)
                        await page.keyboard.press("Control+Enter")
                        await asyncio.sleep(3)
                        
                        stats["replies"] += 1
                        save_replied_user(author)
                        log_post(author, txt, reply)
                        print("  ✅ Posted!", flush=True)
                    
                    await page.go_back()
                    await asyncio.sleep(2)
                    break
                    
            except Exception as e: 
                print(f"  err: {str(e)[:50]}", flush=True)
        
        print(f"\n📊 Checked:{stats['checked']} L:{stats['likes']} R:{stats['replies']}", flush=True)
        print(f"   Skipped: News:{stats['skipped_news']} NoImage:{stats['skipped_no_image']}", flush=True)
        await ctx.close()
    return stats

if __name__ == "__main__":
    r, l = 1, 3
    dry = "--dry-run" in sys.argv
    
    for i, a in enumerate(sys.argv):
        if a == "--replies" and i+1 < len(sys.argv): 
            r = int(sys.argv[i+1])
        elif a == "--likes" and i+1 < len(sys.argv): 
            l = int(sys.argv[i+1])
    
    asyncio.run(run(r, l, dry_run=dry))