#include <iostream>
#include <string>
#include <vector>
#include <cctype>
// Structure to store each match:
// - the matched word
// - the start and end indices in the original text
// - the context window (N words before + target + N words after)
struct Context {
std::string word;
size_t start;
size_t end;
std::string context;
};
// Convert a string to lowercase (for case-insensitive comparison)
std::string toLower(const std::string &s) {
std::string out;
out.reserve(s.size());
for (char c : s) {
out.push_back(std::tolower(c));
}
return out;
}
// Check if a character is alphabetic (punctuation-aware splitting)
bool isWordChar(char c) {
return std::isalpha(static_cast<unsigned char>(c));
}
// Tokenize the text into words while keeping their index positions.
// Each entry contains:
// - the word itself
// - the (start, end) index in the original text
std::vector<std::pair<std::string, std::pair<size_t,size_t>>> tokenize(const std::string &text) {
std::vector<std::pair<std::string, std::pair<size_t,size_t>>> words;
size_t i = 0;
while (i < text.size()) {
// Skip non-word characters (punctuation, spaces, etc.)
while (i < text.size() && !isWordChar(text[i])) i++;
if (i >= text.size()) break;
size_t start = i;
// Read the full word
while (i < text.size() && isWordChar(text[i])) i++;
size_t end = i - 1;
// Store the word and its index range
words.push_back({ text.substr(start, end - start + 1), {start, end} });
}
return words;
}
// Build a context window: N words before + target + N words after
std::string buildContext(const std::vector<std::pair<std::string,std::pair<size_t,size_t>>> &words,
size_t index,
size_t windowSize)
{
std::string out;
// Determine the window boundaries
int start = (index >= windowSize ? index - windowSize : 0);
int end = std::min(index + windowSize, words.size() - 1);
// Concatenate the words in the window
for (int i = start; i <= end; i++) {
out += words[i].first;
if (i < end) out += " ";
}
return out;
}
// Find all occurrences of the target word in the text
// Returns:
// - the matched word
// - its index range
// - the N-word context window
std::vector<Context> findWordContexts(const std::string &text,
const std::string &targetWord,
size_t windowSize)
{
std::vector<Context> results;
// Tokenize the text into words with positions
auto words = tokenize(text);
std::string lowerTarget = toLower(targetWord);
// Scan through all words
for (size_t i = 0; i < words.size(); i++) {
std::string wLower = toLower(words[i].first);
// Case-insensitive match
if (wLower == lowerTarget) {
results.push_back({
words[i].first, // original-case word
words[i].second.first, // start index
words[i].second.second, // end index
buildContext(words, i, windowSize) // dynamic N-word context window
});
}
}
return results;
}
int main() {
// Multi-line text provided by you
std::string text =
"Programming often begins with understanding how data flows through a system and how each component transforms it.\n"
"In many cases, programming requires careful attention to algorithmic complexity, especially when performance constraints are tight.\n"
"Developers rely on modular design so that programming tasks can be isolated, tested, and optimized independently.\n"
"Even when the codebase is stable, programming practices like refactoring and static analysis help maintain long-term reliability.\n"
"Ultimately, the discipline of programming is as much about designing predictable behavior as it is about writing instructions for a machine.";
std::string target = "programming";
size_t windowSize = 4; // <-- dynamic number of words before/after
// Find all contexts
auto contexts = findWordContexts(text, target, windowSize);
// Display results
for (const auto &ctx : contexts) {
std::cout << "Found: \"" << ctx.word << "\" at ["
<< ctx.start << ", " << ctx.end << "]\n";
std::cout << "Context: " << ctx.context << "\n\n";
}
}
/*
run:
Found: "Programming" at [0, 10]
Context: Programming often begins with understanding
Found: "programming" at [129, 139]
Context: it In many cases programming requires careful attention to
Found: "programming" at [288, 298]
Context: modular design so that programming tasks can be isolated
Found: "programming" at [394, 404]
Context: the codebase is stable programming practices like refactoring and
Found: "programming" at [520, 530]
Context: Ultimately the discipline of programming is as much about
*/