How to find the context of a word in text with C++

4 Answers

0 votes
/*
    You can find the context of a word in a text by searching for the word 
    and then extracting the words around it — for example, the whole sentence, 
    or N words before and after it.
*/

// Extract a whole sentence

#include <iostream>
#include <string>

std::string trim(const std::string& s) {
    size_t start = s.find_first_not_of(" \t\n");
    size_t end   = s.find_last_not_of(" \t\n");
    if (start == std::string::npos) return "";
    
    return s.substr(start, end - start + 1);
}

std::string getSentenceContext(const std::string& text, const std::string& word) {
    size_t pos = text.find(word);
    if (pos == std::string::npos) return "";

    size_t start = text.rfind('.', pos);
    start = (start == std::string::npos) ? 0 : start + 1;

    size_t end = text.find('.', pos);
    end = (end == std::string::npos) ? text.size() : end;

    return trim(text.substr(start, end - start));
}

int main() {
    std::string text =
        "C++ is powerful. "
        "Finding context of a word is useful. "
        "Context helps understanding.";

    std::string word = "context";

    std::cout << getSentenceContext(text, word) << "\n";
}



/*
run:

Finding context of a word is useful

*/

 



answered May 7 by avibootz
edited May 7 by avibootz
0 votes
/*
    You can find the context of a word in a text by searching for the word 
    and then extracting the words around it — for example, the whole sentence, 
    or N words before and after it.
*/

// Extract N words before and after the target word

#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>

std::string getWordContext(const std::string& text, const std::string& target, int window = 3) {
    std::istringstream iss(text);
    std::vector<std::string> words;
    std::string w;

    while (iss >> w) words.push_back(w);

    for (int i = 0; i < words.size(); i++) {
        if (words[i] == target) {
            int start = std::max(0, i - window);
            int end   = std::min((int)words.size() - 1, i + window);

            std::string result;
            for (int j = start; j <= end; j++) {
                result += words[j] + " ";
            }
            return result;
        }
    }
    return "";
}

int main() {
    std::string text = 
            "C++ allows you to "
            "find the context of a word inside "
            "a long text easily.";
    std::string word = "context";

    std::cout << getWordContext(text, word, 3) << "\n";
}



/*
run:

to find the context of a word 

*/

 



answered May 7 by avibootz
0 votes
/*
    You can find the context of a word in a text by searching for the word 
    and then extracting the words around it — for example, the whole sentence, 
    or N words before and after it.
*/

// Find all occurrences with context

#include <iostream>
#include <sstream>
#include <vector>
#include <string>
#include <algorithm>

// ------------------------------------------------------------
// findAllContexts()
// Given a text, a target word, and a window size,
// returns all contexts where the word appears.
// Each context contains N words before and after the target.
// ------------------------------------------------------------
std::vector<std::string> findAllContexts(const std::string& text,
                                         const std::string& word,
                                         int window = 3)
{
    std::vector<std::string> contexts;   // stores all found contexts
    std::istringstream iss(text);        // stream to split text into words
    std::vector<std::string> words;      // list of all words in text
    std::string w;

    // Split text into individual words
    while (iss >> w)
        words.push_back(w);

    // Scan through all words to find matches
    for (int i = 0; i < (int)words.size(); i++) {

        // If this word matches the target word
        if (words[i] == word) {

            // Compute the window boundaries
            int start = std::max(0, i - window);
            int end   = std::min((int)words.size() - 1, i + window);

            // Build the context string
            std::string ctx;
            for (int j = start; j <= end; j++)
                ctx += words[j] + " ";

            // Save this context
            contexts.push_back(ctx);
        }
    }

    return contexts;
}

// ------------------------------------------------------------
// Main program demonstrating the function
// ------------------------------------------------------------
int main() {
    // Example text
    std::string text =
        "C++ allows you to find the context of a word inside a long text. "
        "Finding context is useful when analyzing documents. "
        "Sometimes the same word appears many times, and each context matters.";

    // Word to search for
    std::string target = "context";

    // Extract all contexts with a window of 3 words
    std::vector<std::string> results = findAllContexts(text, target, 3);

    // Print results
    std::cout << "Contexts found:\n\n";
    for (const auto& ctx : results) {
        std::cout << "- " << ctx << "\n";
    }
}



/*
run:

Contexts found:

- to find the context of a word 
- long text. Finding context is useful when 
- times, and each context matters. 

*/

 



answered May 7 by avibootz
0 votes
#include <iostream>
#include <string>
#include <vector>
#include <cctype>

// Structure to store each match:
// - the matched word
// - the start and end indices in the original text
// - the context window (N words before + target + N words after)
struct Context {
    std::string word;
    size_t start;
    size_t end;
    std::string context;
};

// Convert a string to lowercase (for case-insensitive comparison)
std::string toLower(const std::string &s) {
    std::string out;
    out.reserve(s.size());
    for (char c : s) {
        out.push_back(std::tolower(c));
    }
    return out;
}

// Check if a character is alphabetic (punctuation-aware splitting)
bool isWordChar(char c) {
    return std::isalpha(static_cast<unsigned char>(c));
}

// Tokenize the text into words while keeping their index positions.
// Each entry contains:
//   - the word itself
//   - the (start, end) index in the original text
std::vector<std::pair<std::string, std::pair<size_t,size_t>>> tokenize(const std::string &text) {
    std::vector<std::pair<std::string, std::pair<size_t,size_t>>> words;

    size_t i = 0;
    while (i < text.size()) {

        // Skip non-word characters (punctuation, spaces, etc.)
        while (i < text.size() && !isWordChar(text[i])) i++;
        if (i >= text.size()) break;

        size_t start = i;

        // Read the full word
        while (i < text.size() && isWordChar(text[i])) i++;
        size_t end = i - 1;

        // Store the word and its index range
        words.push_back({ text.substr(start, end - start + 1), {start, end} });
    }

    return words;
}

// Build a context window: N words before + target + N words after
std::string buildContext(const std::vector<std::pair<std::string,std::pair<size_t,size_t>>> &words,
                         size_t index,
                         size_t windowSize)
{
    std::string out;

    // Determine the window boundaries
    int start = (index >= windowSize ? index - windowSize : 0);
    int end = std::min(index + windowSize, words.size() - 1);

    // Concatenate the words in the window
    for (int i = start; i <= end; i++) {
        out += words[i].first;
        if (i < end) out += " ";
    }

    return out;
}

// Find all occurrences of the target word in the text
// Returns:
//   - the matched word
//   - its index range
//   - the N-word context window
std::vector<Context> findWordContexts(const std::string &text,
                                      const std::string &targetWord,
                                      size_t windowSize)
{
    std::vector<Context> results;

    // Tokenize the text into words with positions
    auto words = tokenize(text);
    std::string lowerTarget = toLower(targetWord);

    // Scan through all words
    for (size_t i = 0; i < words.size(); i++) {
        std::string wLower = toLower(words[i].first);

        // Case-insensitive match
        if (wLower == lowerTarget) {
            results.push_back({
                words[i].first,               // original-case word
                words[i].second.first,        // start index
                words[i].second.second,       // end index
                buildContext(words, i, windowSize)  // dynamic N-word context window
            });
        }
    }

    return results;
}

int main() {
    // Multi-line text provided by you
    std::string text =
        "Programming often begins with understanding how data flows through a system and how each component transforms it.\n"
        "In many cases, programming requires careful attention to algorithmic complexity, especially when performance constraints are tight.\n"
        "Developers rely on modular design so that programming tasks can be isolated, tested, and optimized independently.\n"
        "Even when the codebase is stable, programming practices like refactoring and static analysis help maintain long-term reliability.\n"
        "Ultimately, the discipline of programming is as much about designing predictable behavior as it is about writing instructions for a machine.";

    std::string target = "programming";

    size_t windowSize = 4; // <-- dynamic number of words before/after

    // Find all contexts
    auto contexts = findWordContexts(text, target, windowSize);

    // Display results
    for (const auto &ctx : contexts) {
        std::cout << "Found: \"" << ctx.word << "\" at ["
                  << ctx.start << ", " << ctx.end << "]\n";
        std::cout << "Context: " << ctx.context << "\n\n";
    }
}



/*
run:

Found: "Programming" at [0, 10]
Context: Programming often begins with understanding

Found: "programming" at [129, 139]
Context: it In many cases programming requires careful attention to

Found: "programming" at [288, 298]
Context: modular design so that programming tasks can be isolated

Found: "programming" at [394, 404]
Context: the codebase is stable programming practices like refactoring and

Found: "programming" at [520, 530]
Context: Ultimately the discipline of programming is as much about

*/

 



answered May 7 by avibootz

Related questions

1 answer 182 views
1 answer 244 views
2 answers 196 views
2 answers 190 views
1 answer 143 views
1 answer 194 views
194 views asked Jun 15, 2018 by avibootz
...