Context-free grammars (CFGs) can be used as a model for natural language. That means they capture the structure of language in a relatively simple form that can be reasoned about and used to generate new instances of natural language. My natural language processing book says CFGs were proven insufficient for modeling German spoken in Zurich (Speech and Language Processing (2nd Edition), p.538), but they’re good enough to bring you the following silly, random, grammatical sentences:

function randInt(low_bound_inclusive, high_bound_inclusive) {
var range = high_bound_inclusive – low_bound_inclusive;
var r = Math.random();
r = r * range;
r = r + low_bound_inclusive;
r = Math.round(r);
return r;
}
proper_nouns = [
‘John’,
‘Mary’
];
nominals = [
‘fish’,
‘cat’,
‘dog’,
‘zebra’,
‘horse’,
‘donkey’,
‘giraffe’,
‘walrus’,
];
determiners = [
‘the’,
‘a’,
];
past_tense_verbs = [
‘ate’,
‘dragged in’,
‘befriended’,
‘saluted’,
];
// cfg maps between a non-terminal symbol and the list of expansions it can have
cfg = {
‘S’: [‘NP VP’],
‘NP’: [‘DN’, ‘PN’],
‘DN’: [‘DET N’, ‘DET N that NP V’],
‘PN’: proper_nouns,
‘VP’: [‘V NP’],
‘V’: past_tense_verbs,
‘DET’: determiners,
‘N’: nominals,
}
var nonterminals = [
‘S’,
‘PN’,
‘VP’,
‘V’,
‘NP’,
‘DET’,
‘N’,
‘DN’,
];
function generate(g) {
var tokens = g.split(‘ ‘);
for (var i = 0; i < tokens.length; i++) { var token = tokens[i];
for (var j = 0; j < nonterminals.length; j++) {
var nonterminal = nonterminals[j];
if (token === nonterminal) {
// select one of the possible expansions of token to use
var expansions = cfg[nonterminal];
var expansion = expansions[randInt(0, expansions.length-1)];
// now replace this instance of token with a recursive expansion of its expansion
g = g.replace(token, generate(expansion));
break;
}
}
// if no nonterminal matched, then token was a terminal symbol
}
// all nonterminal symbols have been replaced by terminal ones
return g;
}
document.writeln(‘

    ‘);
    for (var i = 0; i < 5; i++) {
    var sentence = generate(‘S’);
    document.writeln(‘

  • ‘ + sentence + ‘
  • ‘);
    }
    document.writeln(‘

‘);

They may not make sense. They may be hard to understand. But they should be grammatical.

Refresh to see more. View source to see how it’s done.