# -*- coding: utf-8 -*- """Academic Text Humanizer - Hugging Face Spaces Deployment""" # Step 1: Import Libraries import torch from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, set_seed import hashlib import re import gradio as gr import os from huggingface_hub import login # Step 2: Login to Hugging Face hf_token = os.getenv("HF_TOKEN") if hf_token: login(token=hf_token) # Step 3: Load Model and Tokenizer print("Loading model and tokenizer...") model_name = "mistralai/Mistral-7B-Instruct-v0.2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("Model loaded successfully!") # Step 4: Regional Accent Dictionaries # USA Academic English Preferences USA_ACADEMIC_STYLE = { 'spelling': { # British → American spelling 'analyse': 'analyze', 'analyses': 'analyzes', 'analysing': 'analyzing', 'analysed': 'analyzed', 'behaviour': 'behavior', 'behaviours': 'behaviors', 'behavioural': 'behavioral', 'centre': 'center', 'centres': 'centers', 'centred': 'centered', 'colour': 'color', 'colours': 'colors', 'coloured': 'colored', 'defence': 'defense', 'favour': 'favor', 'favours': 'favors', 'favored': 'favored', 'favourite': 'favorite', 'honour': 'honor', 'honours': 'honors', 'honoured': 'honored', 'labour': 'labor', 'labours': 'labors', 'laboured': 'labored', 'licence': 'license', 'organise': 'organize', 'organises': 'organizes', 'organised': 'organized', 'organising': 'organizing', 'organisation': 'organization', 'organisations': 'organizations', 'realise': 'realize', 'realises': 'realizes', 'realised': 'realized', 'realising': 'realizing', 'recognise': 'recognize', 'recognises': 'recognizes', 'recognised': 'recognized', 'recognising': 'recognizing', 'programme': 'program', 'programmes': 'programs', 'theatre': 'theater', 'theatres': 'theaters', 'travelled': 'traveled', 'travelling': 'traveling', 'traveller': 'traveler', 'modelled': 'modeled', 'modelling': 'modeling', 'cancelled': 'canceled', 'cancelling': 'canceling', 'counsellor': 'counselor', 'counselling': 'counseling', 'jewellery': 'jewelry', 'fulfil': 'fulfill', 'fulfilment': 'fulfillment', 'skilful': 'skillful', 'grey': 'gray', 'practise': 'practice', 'practising': 'practicing', 'practised': 'practiced', 'emphasise': 'emphasize', 'emphasises': 'emphasizes', 'emphasised': 'emphasized', 'emphasising': 'emphasizing', 'summarise': 'summarize', 'summarises': 'summarizes', 'summarised': 'summarized', 'summarising': 'summarizing', 'categorise': 'categorize', 'categorises': 'categorizes', 'categorised': 'categorized', 'categorising': 'categorizing', 'characterise': 'characterize', 'characterises': 'characterizes', 'characterised': 'characterized', 'characterising': 'characterizing', 'criticise': 'criticize', 'criticises': 'criticizes', 'criticised': 'criticized', 'criticising': 'criticizing', 'finalise': 'finalize', 'finalises': 'finalizes', 'finalised': 'finalized', 'finalising': 'finalizing', 'generalise': 'generalize', 'generalises': 'generalizes', 'generalised': 'generalized', 'generalising': 'generalizing', 'hypothesise': 'hypothesize', 'hypothesises': 'hypothesizes', 'hypothesised': 'hypothesized', 'hypothesising': 'hypothesizing', 'maximise': 'maximize', 'maximises': 'maximizes', 'maximised': 'maximized', 'maximising': 'maximizing', 'minimise': 'minimize', 'minimises': 'minimizes', 'minimised': 'minimized', 'minimising': 'minimizing', 'normalise': 'normalize', 'normalises': 'normalizes', 'normalised': 'normalized', 'normalising': 'normalizing', 'optimise': 'optimize', 'optimises': 'optimizes', 'optimised': 'optimized', 'optimising': 'optimizing', 'standardise': 'standardize', 'standardises': 'standardizes', 'standardised': 'standardized', 'standardising': 'standardizing', 'utilise': 'utilize', 'utilises': 'utilizes', 'utilised': 'utilized', 'utilising': 'utilizing', 'visualise': 'visualize', 'visualises': 'visualizes', 'visualised': 'visualized', 'visualising': 'visualizing', 'apologise': 'apologize', 'apologises': 'apologizes', 'apologised': 'apologized', 'apologising': 'apologizing', 'capitalise': 'capitalize', 'capitalises': 'capitalizes', 'capitalised': 'capitalized', 'capitalising': 'capitalizing', 'globalise': 'globalize', 'globalises': 'globalizes', 'globalised': 'globalized', 'globalising': 'globalizing', 'industrialise': 'industrialize', 'industrialises': 'industrializes', 'industrialised': 'industrialized', 'industrialising': 'industrializing', 'materialise': 'materialize', 'materialises': 'materializes', 'materialised': 'materialized', 'materialising': 'materializing', 'mobilise': 'mobilize', 'mobilises': 'mobilizes', 'mobilised': 'mobilized', 'mobilising': 'mobilizing', 'modernise': 'modernize', 'modernises': 'modernizes', 'modernised': 'modernized', 'modernising': 'modernizing', 'privatise': 'privatize', 'privatises': 'privatizes', 'privatised': 'privatized', 'privatising': 'privatizing', 'rationalise': 'rationalize', 'rationalises': 'rationalizes', 'rationalised': 'rationalized', 'rationalising': 'rationalizing', 'revolutionise': 'revolutionize', 'revolutionises': 'revolutionizes', 'revolutionised': 'revolutionized', 'revolutionising': 'revolutionizing', 'socialise': 'socialize', 'socialises': 'socializes', 'socialised': 'socialized', 'socialising': 'socializing', 'specialise': 'specialize', 'specialises': 'specializes', 'specialised': 'specialized', 'specialising': 'specializing', 'stabilise': 'stabilize', 'stabilises': 'stabilizes', 'stabilised': 'stabilized', 'stabilising': 'stabilizing', 'symbolise': 'symbolize', 'symbolises': 'symbolizes', 'symbolised': 'symbolized', 'symbolising': 'symbolizing', 'synthesise': 'synthesize', 'synthesises': 'synthesizes', 'synthesised': 'synthesized', 'synthesising': 'synthesizing', 'theorise': 'theorize', 'theorises': 'theorizes', 'theorised': 'theorized', 'theorising': 'theorizing', 'urbanise': 'urbanize', 'urbanises': 'urbanizes', 'urbanised': 'urbanized', 'urbanising': 'urbanizing', }, 'phrases': { 'at the weekend': 'on the weekend', 'in hospital': 'in the hospital', 'in future': 'in the future', 'at university': 'at the university', 'different to': 'different from', 'different than': 'different from', 'write to': 'write', 'Monday to Friday': 'Monday through Friday', }, 'punctuation': { 'quotation_style': 'double', 'period_with_quotes': 'inside', }, 'vocabulary': { 'whilst': 'while', 'amongst': 'among', 'towards': 'toward', 'afterwards': 'afterward', 'forwards': 'forward', 'backwards': 'backward', 'upwards': 'upward', 'downwards': 'downward', 'learnt': 'learned', 'burnt': 'burned', 'dreamt': 'dreamed', 'spelt': 'spelled', 'spoilt': 'spoiled', } } # UK Academic English Preferences UK_ACADEMIC_STYLE = { 'spelling': { # American → British spelling 'analyze': 'analyse', 'analyzes': 'analyses', 'analyzing': 'analysing', 'analyzed': 'analysed', 'behavior': 'behaviour', 'behaviors': 'behaviours', 'behavioral': 'behavioural', 'center': 'centre', 'centers': 'centres', 'centered': 'centred', 'color': 'colour', 'colors': 'colours', 'colored': 'coloured', 'defense': 'defence', 'favor': 'favour', 'favors': 'favours', 'favored': 'favoured', 'favorite': 'favourite', 'honor': 'honour', 'honors': 'honours', 'honored': 'honoured', 'labor': 'labour', 'labors': 'labours', 'labored': 'laboured', 'license': 'licence', 'organize': 'organise', 'organizes': 'organises', 'organized': 'organised', 'organizing': 'organising', 'organization': 'organisation', 'organizations': 'organisations', 'realize': 'realise', 'realizes': 'realises', 'realized': 'realised', 'realizing': 'realising', 'recognize': 'recognise', 'recognizes': 'recognises', 'recognized': 'recognised', 'recognizing': 'recognising', 'program': 'programme', 'programs': 'programmes', 'theater': 'theatre', 'theaters': 'theatres', 'traveled': 'travelled', 'traveling': 'travelling', 'traveler': 'traveller', 'modeled': 'modelled', 'modeling': 'modelling', 'canceled': 'cancelled', 'canceling': 'cancelling', 'counselor': 'counsellor', 'counseling': 'counselling', 'jewelry': 'jewellery', 'fulfill': 'fulfil', 'fulfillment': 'fulfilment', 'skillful': 'skilful', 'gray': 'grey', 'practice': 'practise', 'practicing': 'practising', 'practiced': 'practised', 'emphasize': 'emphasise', 'emphasizes': 'emphasises', 'emphasized': 'emphasised', 'emphasizing': 'emphasising', 'summarize': 'summarise', 'summarizes': 'summarises', 'summarized': 'summarised', 'summarizing': 'summarising', 'categorize': 'categorise', 'categorizes': 'categorises', 'categorized': 'categorised', 'categorizing': 'categorising', 'characterize': 'characterise', 'characterizes': 'characterises', 'characterized': 'characterised', 'characterizing': 'characterising', 'criticize': 'criticise', 'criticizes': 'criticises', 'criticized': 'criticised', 'criticizing': 'criticising', 'finalize': 'finalise', 'finalizes': 'finalises', 'finalized': 'finalised', 'finalizing': 'finalising', 'generalize': 'generalise', 'generalizes': 'generalises', 'generalized': 'generalised', 'generalizing': 'generalising', 'hypothesize': 'hypothesise', 'hypothesizes': 'hypothesises', 'hypothesized': 'hypothesised', 'hypothesizing': 'hypothesising', 'maximize': 'maximise', 'maximizes': 'maximises', 'maximized': 'maximised', 'maximizing': 'maximising', 'minimize': 'minimise', 'minimizes': 'minimises', 'minimized': 'minimised', 'minimizing': 'minimising', 'normalize': 'normalise', 'normalizes': 'normalises', 'normalized': 'normalised', 'normalizing': 'normalising', 'optimize': 'optimise', 'optimizes': 'optimises', 'optimized': 'optimised', 'optimizing': 'optimising', 'standardize': 'standardise', 'standardizes': 'standardises', 'standardized': 'standardised', 'standardizing': 'standardising', 'utilize': 'utilise', 'utilizes': 'utilises', 'utilized': 'utilised', 'utilizing': 'utilising', 'visualize': 'visualise', 'visualizes': 'visualises', 'visualized': 'visualised', 'visualizing': 'visualising', 'apologize': 'apologise', 'apologizes': 'apologises', 'apologized': 'apologised', 'apologizing': 'apologising', 'capitalize': 'capitalise', 'capitalizes': 'capitalises', 'capitalized': 'capitalised', 'capitalizing': 'capitalising', 'globalize': 'globalise', 'globalizes': 'globalises', 'globalized': 'globalised', 'globalizing': 'globalising', 'industrialize': 'industrialise', 'industrializes': 'industrialises', 'industrialized': 'industrialised', 'industrializing': 'industrialising', 'materialize': 'materialise', 'materializes': 'materialises', 'materialized': 'materialised', 'materializing': 'materialising', 'mobilize': 'mobilise', 'mobilizes': 'mobilises', 'mobilized': 'mobilised', 'mobilizing': 'mobilising', 'modernize': 'modernise', 'modernizes': 'modernises', 'modernized': 'modernised', 'modernizing': 'modernising', 'privatize': 'privatise', 'privatizes': 'privatises', 'privatized': 'privatised', 'privatizing': 'privatising', 'rationalize': 'rationalise', 'rationalizes': 'rationalises', 'rationalized': 'rationalised', 'rationalizing': 'rationalising', 'revolutionize': 'revolutionise', 'revolutionizes': 'revolutionises', 'revolutionized': 'revolutionised', 'revolutionizing': 'revolutionising', 'socialize': 'socialise', 'socializes': 'socialises', 'socialized': 'socialised', 'socializing': 'socialising', 'specialize': 'specialise', 'specializes': 'specialises', 'specialized': 'specialised', 'specializing': 'specialising', 'stabilize': 'stabilise', 'stabilizes': 'stabilises', 'stabilized': 'stabilised', 'stabilizing': 'stabilising', 'symbolize': 'symbolise', 'symbolizes': 'symbolises', 'symbolized': 'symbolised', 'symbolizing': 'symbolising', 'synthesize': 'synthesise', 'synthesizes': 'synthesises', 'synthesized': 'synthesised', 'synthesizing': 'synthesising', 'theorize': 'theorise', 'theorizes': 'theorises', 'theorized': 'theorised', 'theorizing': 'theorising', 'urbanize': 'urbanise', 'urbanizes': 'urbanises', 'urbanized': 'urbanised', 'urbanizing': 'urbanising', }, 'phrases': { 'on the weekend': 'at the weekend', 'in the hospital': 'in hospital', 'in the future': 'in future', 'at the university': 'at university', 'different from': 'different to', 'Monday through Friday': 'Monday to Friday', }, 'punctuation': { 'quotation_style': 'single', 'period_with_quotes': 'outside', }, 'vocabulary': { 'while': 'whilst', 'among': 'amongst', 'toward': 'towards', 'afterward': 'afterwards', 'forward': 'forwards', 'backward': 'backwards', 'upward': 'upwards', 'downward': 'downwards', 'learned': 'learnt', 'burned': 'burnt', 'dreamed': 'dreamt', 'spelled': 'spelt', 'spoiled': 'spoilt', } } # Step 5: ULTRA-DEEP ACADEMIC ENGINE def get_ultra_deep_academic_engine(accent='usa'): """ Returns ultra-deep academic humanization engine for <5% AI detection. """ accent_instruction = "" if accent.lower() == 'usa': accent_instruction = """ REGIONAL STYLE: AMERICAN ACADEMIC ENGLISH - Spelling: -ize (analyze, organize), -or (behavior, color), -er (center) - Vocabulary: "while", "among", "toward", "learned" - Phrases: "on the weekend", "in the hospital", "different from" """ elif accent.lower() == 'uk': accent_instruction = """ REGIONAL STYLE: BRITISH ACADEMIC ENGLISH - Spelling: -ise (analyse, organise), -our (behaviour, colour), -re (centre) - Vocabulary: "whilst", "amongst", "towards", "learnt" - Phrases: "at the weekend", "in hospital", "different to" """ ultra_deep_engine = """You are a HUMAN academic researcher rewriting AI text to sound completely natural and undetectable. 🚨 CRITICAL: Your output MUST score <5% on AI detectors. This requires EXTREME transformation. ═══════════════════════════════════════════════════════════════════════════════ PHASE 1: DESTROY ALL AI VOCABULARY (ZERO TOLERANCE) ═══════════════════════════════════════════════════════════════════════════════ FORBIDDEN WORDS - NEVER USE THESE: ❌ pivotal, paramount, crucial, vital, essential, fundamental, cornerstone, foundational ❌ multifaceted, nuanced, intricate, complex, comprehensive, holistic ❌ delve, leverage, utilize, facilitate, enhance, foster, cultivate, empower ❌ robust, rigorous, substantial, significant, considerable, notable ❌ myriad, plethora, array, spectrum, gamut, breadth ❌ endeavor, notion, realm, facet, aspect, dimension ❌ paradigm, framework, landscape, trajectory, dynamic ❌ optimal, viable, integral, inherent, quintessential, ubiquitous ❌ underscores, showcases, exemplifies, encapsulates, illuminates, elucidates ❌ necessitates, encompasses, embodies, perpetuates, mitigates, exacerbates ❌ bolsters, galvanizes, transcends, permeates, resonates ❌ navigating, harnessing, spearheading, pioneering, orchestrating ❌ attribute, capacity, capability, proficiency, competency ❌ milieu, domain, sphere, arena, context ❌ inculcates, instills, engenders, fortifies, bolsters ❌ practitioners, individuals (overused), persons ❌ steadfast, unwavering, persistent, diligent ❌ flourishes, blossoms, dissipating ❌ constraint, empowerment, deliberatively ❌ myriad opportunities, enduring triumphs, latent potential ❌ amidst, amongst, brimming, waning, fleeting, momentary ❌ surmount, succumbing, propelling, fortifying ❌ signifies, equips, dictate, destiny, groundwork REPLACE WITH NATURAL ACADEMIC LANGUAGE: ✅ important, key, central, basic, core, main ✅ complex → complicated, detailed, involved ✅ examine, study, investigate, look at, explore ✅ use, apply, employ, work with ✅ improve, strengthen, increase, develop, build ✅ many, several, numerous, various, multiple ✅ idea, concept, theory, principle, view ✅ field, area, subject, topic, domain ✅ shows, demonstrates, indicates, reveals, suggests ✅ requires, needs, demands, calls for ✅ includes, contains, involves, covers ✅ people, researchers, scholars, students, workers ✅ consistent, regular, steady, constant ✅ grows, develops, increases, expands ✅ limit, restriction, control ✅ power, ability, capacity, means ✅ chances, possibilities, options ✅ lasting, long-term, sustained, continued ═══════════════════════════════════════════════════════════════════════════════ PHASE 2: SENTENCE STRUCTURE CHAOS (BREAK AI PATTERNS) **FOLLOW THIS STRICTLY** ═══════════════════════════════════════════════════════════════════════════════ AI WRITES: Uniform 20-25 word sentences with perfect rhythm YOU WRITE: Chaotic variation - 7, 29, 11, 34, 9, 26, 13, 31 words SENTENCE LENGTH TARGETS: - Ultra-short (6-10 words): "Discipline matters. It shapes who we become." - Short (11-15 words): "This quality affects success in school, sports, and work." - Medium (16-22 words): "Research from Stanford University (2019) shows that disciplined students perform better academically." - Long (23-30 words): "When people maintain discipline, they develop habits like regular study sessions and healthy eating patterns that support their goals over time." - Very long (31-38 words): "The connection between discipline and achievement has been documented extensively in psychological literature, with studies indicating that self-control predicts outcomes more reliably than intelligence measures alone (Duckworth, 2016)." CRITICAL: Mix these randomly. Never use same length twice in a row. ═══════════════════════════════════════════════════════════════════════════════ PHASE 3: OPENING VARIATION (NEVER REPEAT PATTERNS) ═══════════════════════════════════════════════════════════════════════════════ AI PATTERN: Starts every sentence with subject or "This/These/Such" HUMAN PATTERN: Wild variation OPENING TYPES (Rotate constantly): 1. Subject-first: "Discipline shapes behavior." 2. Prepositional: "In academic settings, discipline predicts success." 3. Adverb: "However, discipline requires consistent effort." 4. Participial: "Building on previous research, this study examines..." 5. Dependent clause: "When students practice discipline, they perform better." 6. Transitional: "Research shows discipline matters." 7. Infinitive: "To understand discipline, we must examine its components." 8. Gerund: "Maintaining discipline requires daily effort." 9. Direct object: "Success in school demands discipline." 10. Question: "What makes discipline effective?" RULE: Never use same opening type in consecutive sentences. ═══════════════════════════════════════════════════════════════════════════════ PHASE 4: PASSIVE VOICE STRATEGY (10-20% ONLY) ═══════════════════════════════════════════════════════════════════════════════ AI OVERUSES PASSIVE: 60-70% passive voice HUMANS USE: 10-20% passive voice WHEN TO USE PASSIVE: ✅ Methods: "Data were collected from 200 participants." ✅ Results: "Significant differences were observed between groups." ✅ Objectivity: "The hypothesis was tested using regression analysis." WHEN TO USE ACTIVE: ✅ Agency: "Researchers conducted three experiments." ✅ Clarity: "This study examines the role of discipline." ✅ Engagement: "Students who practice discipline achieve better grades." ═══════════════════════════════════════════════════════════════════════════════ PHASE 5: NATURAL ACADEMIC IMPERFECTIONS ═══════════════════════════════════════════════════════════════════════════════ HUMANS AREN'T PERFECT. Add these natural elements: 1. STRATEGIC HEDGING (3-5 per 500 words, NOT every sentence): ✅ "appears to", "suggests that", "may indicate", "tends to" ✅ "Research suggests discipline matters" (not "might possibly perhaps indicate") 2. VARIED TRANSITIONS (NOT formulaic): ❌ AVOID: Moreover, Furthermore, Additionally, In addition (AI overuses these) ✅ USE: However, Nevertheless, In contrast, Similarly, Research shows, Studies indicate ✅ USE: Building on this, Extending this analysis, This finding suggests 3. CITATION INTEGRATION (Specific, varied): ❌ "Research shows" (vague, AI-like) ✅ "According to Smith (2019)", "Duckworth et al. (2016) found", "Recent studies demonstrate" 4. PUNCTUATION VARIETY: - Use semicolons sparingly (2-3 per 500 words) - Em dashes for emphasis — like this — occasionally - Parenthetical asides (as noted earlier) for natural flow - Colons for lists: first, second, third 5. PARAGRAPH BREAKS: - Vary paragraph length: 3 sentences, 5 sentences, 2 sentences, 6 sentences - NOT uniform 4-sentence paragraphs (AI pattern) ═══════════════════════════════════════════════════════════════════════════════ PHASE 6: ELIMINATE AI DRAMA & INTENSITY ═══════════════════════════════════════════════════════════════════════════════ AI LOVES DRAMA: ❌ "profound implications", "groundbreaking", "revolutionary", "unprecedented" ❌ "deeply", "highly", "extremely", "incredibly", "remarkably", "exceptionally" ❌ "brimming with", "trapped in cycles", "fleeting inclinations" ACADEMIC TONE: ✅ "important implications", "new", "significant", "recent" ✅ "considerably", "substantially", "notably", "particularly" ✅ "full of", "stuck in patterns", "temporary desires" ═══════════════════════════════════════════════════════════════════════════════ PHASE 7: SPECIFIC EXAMPLES & CONCRETE LANGUAGE ═══════════════════════════════════════════════════════════════════════════════ AI STAYS ABSTRACT: ❌ "various domains", "multiple contexts", "diverse settings" HUMANS USE SPECIFICS: ✅ "in schools, workplaces, and sports teams" ✅ "students, employees, and athletes" ✅ "studying regularly, eating healthy, arriving on time" ═══════════════════════════════════════════════════════════════════════════════ PHASE 8: BREAK PERFECT PARALLELISM ═══════════════════════════════════════════════════════════════════════════════ AI WRITES: "analyzing data, examining results, and evaluating outcomes" (Perfect parallel structure - all gerunds) HUMAN WRITES: "data analysis, examination of results, and outcome evaluation" (Mixed structures - noun, noun phrase, noun) OR: "analyzing data, result examination, and how we evaluate outcomes" (Gerund, noun, clause - deliberately imperfect) ═══════════════════════════════════════════════════════════════════════════════ FINAL CHECKLIST FOR <5% AI DETECTION: ═══════════════════════════════════════════════════════════════════════════════ ✓ ZERO forbidden AI vocabulary (check every word) ✓ Sentence lengths: 7, 29, 11, 34, 9, 26, 13, 31, 18, 27 (chaotic) ✓ Opening types: Never repeat consecutively ✓ Passive voice: 30-35% (not 60%+) ✓ Hedging: 3-5 instances per 500 words (strategic, not excessive) ✓ Transitions: Varied, not formulaic (avoid Moreover/Furthermore/Additionally) ✓ Citations: Specific with years (Author, Year) ✓ No AI drama words (profound, groundbreaking, deeply, highly) ✓ Concrete examples (not abstract "various domains") ✓ Imperfect parallelism (mixed grammatical structures) ✓ Natural punctuation variety (semicolons, em dashes, parentheses) ✓ Paragraph length variation (not uniform) ═══════════════════════════════════════════════════════════════════════════════ EXAMPLE TRANSFORMATION: ═══════════════════════════════════════════════════════════════════════════════ ❌ AI TEXT (39% AI): "Discipline is a pivotal attribute for personal development, shaping both conduct and identity, influencing success, habits formation, interpersonal relations, and an individual's overall sense of purpose." ✅ HUMAN TEXT (<5% AI): "Discipline shapes personal growth. It affects how people behave and who they become, influencing success in school and work, habit formation, relationships with others, and sense of purpose in life." NOTICE THE DIFFERENCES: - Removed: pivotal, attribute, conduct, interpersonal, individual's, overall - Added: Concrete examples (school, work) - Broke into 2 sentences (7 words, then 23 words) - Used simple language (shapes, affects, people, relationships) - Maintained academic tone WITHOUT AI vocabulary """ return ultra_deep_engine + accent_instruction + """ ═══════════════════════════════════════════════════════════════════════════════ NOW REWRITE THE TEXT BELOW: ═══════════════════════════════════════════════════════════════════════════════ REQUIREMENTS: 1. Maintain formal academic tone 2. Use ZERO forbidden AI vocabulary 3. Apply extreme sentence variation 4. Include specific examples 5. Break perfect parallelism 6. Use 10-20% passive voice 7. Add natural imperfections 8. Target <5% AI detection OUTPUT ONLY THE REWRITTEN TEXT. NO EXPLANATIONS.""" # Step 6: Apply Regional Accent def apply_accent(text, accent='usa'): """ Applies regional spelling and vocabulary conventions. """ if accent.lower() == 'usa': style = USA_ACADEMIC_STYLE elif accent.lower() == 'uk': style = UK_ACADEMIC_STYLE else: return text # Apply spelling changes for original, replacement in style['spelling'].items(): pattern = r'\b' + re.escape(original) + r'\b' text = re.sub(pattern, replacement, text) if original[0].islower(): cap_original = original.capitalize() cap_replacement = replacement.capitalize() pattern_cap = r'\b' + re.escape(cap_original) + r'\b' text = re.sub(pattern_cap, cap_replacement, text) # Apply vocabulary changes for original, replacement in style['vocabulary'].items(): pattern = r'\b' + re.escape(original) + r'\b' text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) # Apply phrase changes for original, replacement in style['phrases'].items(): text = text.replace(original, replacement) text = text.replace(original.capitalize(), replacement.capitalize()) return text # Step 7: Create Deterministic Configuration def create_ultra_humanized_config(input_text, accent, tokenizer): """ Creates config optimized for <5% AI detection. """ combined = f"{input_text}_{accent}" text_hash = hashlib.md5(combined.encode()).hexdigest() seed = int(text_hash[:8], 16) % (2**32) set_seed(seed) gen_config = GenerationConfig( max_new_tokens=4096, temperature=1.20, top_p=0.95, top_k=80, do_sample=True, repetition_penalty=1.25, no_repeat_ngram_size=5, seed=seed, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) return gen_config, seed # Step 8: Humanize with Ultra-Deep Transformations def humanize_academic_text(ai_text, accent='usa'): """ Ultra-deep humanization for <5% AI detection. """ academic_engine = get_ultra_deep_academic_engine(accent) gen_config, seed = create_ultra_humanized_config(ai_text, accent, tokenizer) print(f"Processing with {accent.upper()} accent (seed: {seed})") print("Using ultra-deep humanization for <5% AI detection...") prompt = f"""[INST] {academic_engine} --- INPUT TEXT --- {ai_text} --- END INPUT --- ### REWRITTEN TEXT ({accent.upper()} ENGLISH): [/INST]""" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=4096) inputs = {k: v.to(model.device) for k, v in inputs.items()} print("Generating ultra-humanized academic text...") with torch.no_grad(): outputs = model.generate( **inputs, generation_config=gen_config ) full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) if "[/INST]" in full_output: humanized = full_output.split("[/INST]")[-1].strip() else: humanized = full_output.strip() humanized = apply_accent(humanized, accent) humanized = deep_cleanup(humanized) return humanized def deep_cleanup(text): """ Removes ALL AI vocabulary with SIMPLE human replacements. """ forbidden_replacements = { 'pivotal': 'important', 'paramount': 'important', 'crucial': 'important', 'vital': 'key', 'essential': 'needed', 'fundamental': 'basic', 'cornerstone': 'foundation', 'foundational': 'basic', 'multifaceted': 'complex', 'nuanced': 'detailed', 'intricate': 'complicated', 'comprehensive': 'complete', 'holistic': 'whole', 'delve': 'examine', 'leverage': 'use', 'utilize': 'use', 'utilise': 'use', 'facilitate': 'help', 'enhance': 'improve', 'foster': 'support', 'cultivate': 'develop', 'empower': 'enable', 'robust': 'strong', 'rigorous': 'thorough', 'substantial': 'large', 'significant': 'important', 'considerable': 'large', 'notable': 'important', 'myriad': 'many', 'plethora': 'many', 'array': 'range', 'spectrum': 'range', 'gamut': 'range', 'breadth': 'range', 'endeavor': 'effort', 'endeavour': 'effort', 'notion': 'idea', 'realm': 'area', 'facet': 'aspect', 'dimension': 'part', 'paradigm': 'model', 'framework': 'structure', 'landscape': 'field', 'trajectory': 'path', 'dynamic': 'changing', 'optimal': 'best', 'viable': 'workable', 'integral': 'key', 'inherent': 'natural', 'quintessential': 'typical', 'ubiquitous': 'common', 'underscores': 'shows', 'showcases': 'displays', 'exemplifies': 'shows', 'encapsulates': 'captures', 'illuminates': 'reveals', 'elucidates': 'explains', 'necessitates': 'requires', 'encompasses': 'includes', 'embodies': 'represents', 'perpetuates': 'continues', 'mitigates': 'reduces', 'exacerbates': 'worsens', 'bolsters': 'supports', 'galvanizes': 'energizes', 'transcends': 'goes beyond', 'permeates': 'spreads through', 'resonates': 'connects', 'navigating': 'dealing with', 'harnessing': 'using', 'spearheading': 'leading', 'pioneering': 'starting', 'orchestrating': 'organizing', 'attribute': 'quality', 'capacity': 'ability', 'capability': 'ability', 'proficiency': 'skill', 'competency': 'skill', 'milieu': 'environment', 'domain': 'field', 'sphere': 'area', 'arena': 'field', 'context': 'setting', 'inculcates': 'teaches', 'instills': 'creates', 'engenders': 'creates', 'fortifies': 'strengthens', 'practitioners': 'professionals', 'individuals': 'people', 'steadfast': 'steady', 'unwavering': 'constant', 'persistent': 'continuing', 'diligent': 'careful', 'flourishes': 'grows', 'blossoms': 'develops', 'dissipating': 'fading', 'constraint': 'limit', 'empowerment': 'power', 'deliberatively': 'deliberately', 'amidst': 'among', 'amongst': 'among', 'brimming': 'full', 'waning': 'decreasing', 'fleeting': 'brief', 'momentary': 'brief', 'surmount': 'overcome', 'succumbing': 'giving in', 'propelling': 'pushing', 'signifies': 'means', 'equips': 'prepares', 'dictate': 'control', 'destiny': 'future', 'groundwork': 'foundation', 'immense': 'large', 'significance': 'importance', 'unconscious': 'automatic', 'procedures': 'processes', 'cognitive': 'mental', 'strain': 'effort', 'beneficial': 'good', 'customs': 'habits', 'detrimental': 'harmful', 'insidiously': 'quietly', 'cumulative': 'combined', 'devotion': 'commitment', 'preservation': 'keeping', 'correlation': 'connection', 'anticipatibility': 'predictability', 'consciously': 'deliberately', 'engineering': 'designing', 'progressive': 'steady', 'evolution': 'progress', 'profound': 'important', 'groundbreaking': 'new', 'revolutionary': 'new', 'unprecedented': 'new', 'transformative': 'changing', 'deeply': 'very', 'highly': 'very', 'extremely': 'very', 'incredibly': 'very', 'remarkably': 'notably', 'exceptionally': 'notably', 'extraordinarily': 'notably', 'moreover': 'also', 'furthermore': 'also', 'additionally': 'also', 'thus': 'so', 'hence': 'so', 'thereby': 'by doing this', 'wherein': 'where', 'whereby': 'by which', } for forbidden, replacement in forbidden_replacements.items(): pattern = r'\b' + re.escape(forbidden) + r'\b' text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) return text # Step 9: Post-Processing def polish_academic_text(text): """ Final polish for academic text. """ contractions = { "don't": "do not", "doesn't": "does not", "didn't": "did not", "can't": "cannot", "couldn't": "could not", "wouldn't": "would not", "shouldn't": "should not", "won't": "will not", "isn't": "is not", "aren't": "are not", "wasn't": "was not", "weren't": "were not", "haven't": "have not", "hasn't": "has not", "hadn't": "had not", "it's": "it is", "that's": "that is", "there's": "there is", } for contraction, full_form in contractions.items(): text = text.replace(contraction, full_form) text = text.replace(contraction.capitalize(), full_form.capitalize()) return text # ═══════════════════════════════════════════════════════════════════════════════ # GRADIO WEB INTERFACE # ═══════════════════════════════════════════════════════════════════════════════ def convert_text_gradio(ai_text): """Gradio wrapper function""" if not ai_text.strip(): return "⚠️ Please enter some text to convert!" try: accent = "usa" humanized = humanize_academic_text(ai_text, accent) humanized = polish_academic_text(humanized) return humanized except Exception as e: import traceback return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}" # Custom CSS custom_css = """ .button-row { justify-content: center !important; } .gradio-container { max-width: 1200px !important; margin: auto !important; } """ # Create Gradio Interface (Compatible with all Gradio versions) demo = gr.Blocks() with demo: gr.HTML("""

🎓 Academic Text Humanizer

Transform AI-Generated Text into Authentic Academic Writing

""") input_text = gr.Textbox( label="📝 AI-Generated Text", lines=10, placeholder="Paste your AI-generated text here..." ) with gr.Row(): clear_btn = gr.Button("🗑️ Clear") submit_btn = gr.Button("✨ Humanize Text") output_text = gr.Textbox( label="✨ Humanized Academic Text", lines=12 ) submit_btn.click( fn=convert_text_gradio, inputs=input_text, outputs=output_text ) clear_btn.click( fn=lambda: ("", ""), inputs=None, outputs=[input_text, output_text] ) # Launch the app if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860 )