Frequency of a Substring in a String
Last Updated :
18 Apr, 2023
Given an input string and a pattern, the task is to find the frequency of occurrences of the string pattern in a given string.
Examples:
Input: pattern = "man", string = "dhimanman"
Output: 2
Input: pattern = "nn", string = "Banana"
Output: 0
Input: pattern = "aa", string = "aaaaa"
Output : 4
Approach:
A simple solution is to match characters one by one. And whenever we see a complete match, increment count. For this, we can use Naive pattern searching.
Below is the implementation of the above approach.
C++
// Simple C++ program to count occurrences
// of pat in txt.
#include <bits/stdc++.h>
using namespace std;
int countFreq(string& pat, string& txt)
{
int M = pat.length();
int N = txt.length();
int res = 0;
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
/* For current index i, check for
pattern match */
int j;
for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M) {
res++;
}
}
return res;
}
/* Driver program to test above function */
int main()
{
string txt = "dhimanman";
string pat = "man";
cout << countFreq(pat, txt);
return 0;
}
Java
// Simple Java program to count occurrences
// of pat in txt.
class GFG {
static int countFreq(String pat, String txt)
{
int M = pat.length();
int N = txt.length();
int res = 0;
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
/* For current index i, check for
pattern match */
int j;
for (j = 0; j < M; j++) {
if (txt.charAt(i + j) != pat.charAt(j)) {
break;
}
}
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M) {
res++;
j = 0;
}
}
return res;
}
/* Driver program to test above function */
static public void main(String[] args)
{
String txt = "dhimanman";
String pat = "man";
System.out.println(countFreq(pat, txt));
}
}
// This code is contributed by 29AjayKumar
Python3
# Simple python program to count
# occurrences of pat in txt.
def countFreq(pat, txt):
M = len(pat)
N = len(txt)
res = 0
# A loop to slide pat[] one by one
for i in range(N - M + 1):
# For current index i, check
# for pattern match
j = 0
while j < M:
if (txt[i + j] != pat[j]):
break
j += 1
if (j == M):
res += 1
j = 0
return res
# Driver Code
if __name__ == '__main__':
txt = "dhimanman"
pat = "man"
print(countFreq(pat, txt))
# This code is contributed
# by PrinciRaj1992
C#
// Simple C# program to count occurrences
// of pat in txt.
using System;
public class GFG {
static int countFreq(String pat, String txt)
{
int M = pat.Length;
int N = txt.Length;
int res = 0;
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
/* For current index i, check for
pattern match */
int j;
for (j = 0; j < M; j++) {
if (txt[i + j] != pat[j]) {
break;
}
}
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M) {
res++;
j = 0;
}
}
return res;
}
/* Driver program to test above function */
static public void Main()
{
String txt = "dhimanman";
String pat = "man";
Console.Write(countFreq(pat, txt));
}
}
// This code is contributed by 29AjayKumar
PHP
<?php
// Simple PHP program to count occurrences
// of pat in txt.
function countFreq($pat, $txt)
{
$M = strlen($pat);
$N = strlen($txt);
$res = 0;
/* A loop to slide pat[] one by one */
for ($i = 0; $i <= $N - $M; $i++)
{
/* For current index i, check for
pattern match */
for ($j = 0; $j < $M; $j++)
if ($txt[$i+$j] != $pat[$j])
break;
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if ($j == $M)
{
$res++;
$j = 0;
}
}
return $res;
}
// Driver Code
$txt = "dhimanman";
$pat = "man";
echo countFreq($pat, $txt);
// This code is contributed
// by Akanksha Rai
JavaScript
<script>
// JavaScript program to count occurrences
// of pat in txt.
let mod = 100000007;
function countFreq(pat, txt)
{
let M = pat.length;
let N = txt.length;
let res = 0;
// A loop to slide pat[] one by one
for(let i = 0; i <= N - M; i++)
{
// For current index i, check for
// pattern match
let j;
for(j = 0; j < M; j++)
{
if (txt[i + j] != pat[j])
{
break;
}
}
// If pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M)
{
res++;
j = 0;
}
}
return res;
}
// Driver Code
let txt = "dhimanman";
let pat = "man";
document.write(countFreq(pat, txt));
// This code is contributed by code_hunt
</script>
Time Complexity: O(M * N)
Auxiliary Space: O(1)
Efficient Approach:
An efficient solution is to use KMP algorithm.
Below is the implementation of the above approach.
C++
// C++ program to count occurrences
// of pattern in a text.
#include <iostream>
using namespace std;
void computeLPSArray(string pat, int M, int lps[])
{
// Length of the previous longest
// prefix suffix
int len = 0;
int i = 1;
lps[0] = 0; // lps[0] is always 0
// The loop calculates lps[i] for
// i = 1 to M-1
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not
// increment i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
int KMPSearch(string pat, string txt)
{
int M = pat.length();
int N = txt.length();
// Create lps[] that will hold the longest
// prefix suffix values for pattern
int lps[M];
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int res = 0;
int next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
}
// Mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]]
// characters, they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
// Driver code
int main()
{
string txt = "geeksforgeeks";
string pat = "eeks";
int ans = KMPSearch(pat, txt);
cout << ans;
return 0;
}
// This code is contributed by akhilsaini
Java
// Java program to count occurrences of pattern
// in a text.
class KMP_String_Matching {
int KMPSearch(String pat, String txt)
{
int M = pat.length();
int N = txt.length();
// create lps[] that will hold the longest
// prefix suffix values for pattern
int lps[] = new int[M];
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int res = 0;
int next_i = 0;
while (i < N) {
if (pat.charAt(j) == txt.charAt(i)) {
j++;
i++;
}
if (j == M) {
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
// We start i to check for more than once
// appearance of pattern, we will reset i
// to previous start+1
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
// mismatch after j matches
else if (i < N
&& pat.charAt(j) != txt.charAt(i)) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
void computeLPSArray(String pat, int M, int lps[])
{
// length of the previous longest prefix suffix
int len = 0;
int i = 1;
lps[0] = 0; // lps[0] is always 0
// the loop calculates lps[i] for i = 1 to M-1
while (i < M) {
if (pat.charAt(i) == pat.charAt(len)) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not increment
// i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
// Driver program to test above function
public static void main(String args[])
{
String txt = "geeksforgeeks";
String pat = "eeks";
int ans
= new KMP_String_Matching().KMPSearch(pat, txt);
System.out.println(ans);
}
}
Python3
# Python3 program to count occurrences of
# pattern in a text.
def KMPSearch(pat, txt):
M = len(pat)
N = len(txt)
# Create lps[] that will hold the longest
# prefix suffix values for pattern
lps = [None] * M
j = 0 # index for pat[]
# Preprocess the pattern (calculate lps[]
# array)
computeLPSArray(pat, M, lps)
i = 0 # index for txt[]
res = 0
next_i = 0
while (i < N):
if pat[j] == txt[i]:
j = j + 1
i = i + 1
if j == M:
# When we find pattern first time,
# we iterate again to check if there
# exists more pattern
j = lps[j - 1]
res = res + 1
# We start i to check for more than once
# appearance of pattern, we will reset i
# to previous start+1
if lps[j] != 0:
next_i = next_i + 1
i = next_i
j = 0
# Mismatch after j matches
elif ((i < N) and (pat[j] != txt[i])):
# Do not match lps[0..lps[j-1]]
# characters, they will match anyway
if (j != 0):
j = lps[j - 1]
else:
i = i + 1
return res
def computeLPSArray(pat, M, lps):
# Length of the previous longest
# prefix suffix
len = 0
i = 1
lps[0] = 0 # lps[0] is always 0
# The loop calculates lps[i] for
# i = 1 to M-1
while (i < M):
if pat[i] == pat[len]:
len = len + 1
lps[i] = len
i = i + 1
else: # (pat[i] != pat[len])
# This is tricky. Consider the example.
# AAACAAAA and i = 7. The idea is similar
# to search step.
if len != 0:
len = lps[len - 1]
# Also, note that we do not increment
# i here
else: # if (len == 0)
lps[i] = len
i = i + 1
# Driver code
if __name__ == "__main__":
txt = "geeksforgeeks"
pat = "eeks"
ans = KMPSearch(pat, txt)
print(ans)
# This code is contributed by akhilsaini
C#
// C# program to count occurrences of pattern
// in a text.
using System;
public class KMP_String_Matching {
int KMPSearch(String pat, String txt)
{
int M = pat.Length;
int N = txt.Length;
// create lps[] that will hold the longest
// prefix suffix values for pattern
int[] lps = new int[M];
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int res = 0;
int next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
// We start i to check for more than once
// appearance of pattern, we will reset i
// to previous start+1
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
// mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
void computeLPSArray(String pat, int M, int[] lps)
{
// length of the previous longest prefix suffix
int len = 0;
int i = 1;
lps[0] = 0; // lps[0] is always 0
// the loop calculates lps[i] for i = 1 to M-1
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not increment
// i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
// Driver code
public static void Main(String[] args)
{
String txt = "geeksforgeeks";
String pat = "eeks";
int ans
= new KMP_String_Matching().KMPSearch(pat, txt);
Console.WriteLine(ans);
}
}
// This code is contributed by Princi Singh
JavaScript
<script>
// JavaScript program to count occurrences
// of pattern in a text.
function computeLPSArray(pat,M,lps)
{
// Length of the previous longest
// prefix suffix
let len = 0;
let i = 1;
lps[0] = 0; // lps[0] is always 0
// The loop calculates lps[i] for
// i = 1 to M-1
while (i < M)
{
if (pat[i] == pat[len])
{
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0)
{
len = lps[len - 1];
// Also, note that we do not
// increment i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
function KMPSearch(pat,txt)
{
let M = pat.length;
let N = txt.length;
// Create lps[] that will hold the longest
// prefix suffix values for pattern
let lps = new Array(M);
lps.fill(0);
let j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
let i = 0; // index for txt[]
let res = 0;
let next_i = 0;
while (i < N)
{
if (pat[j] == txt[i])
{
j++;
i++;
}
if (j == M)
{
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
// We start i to check for more than once
// appearance of pattern, we will reset i
// to previous start+1
if (lps[j]!=0)
i = ++next_i;
j = 0;
}
// Mismatch after j matches
else if (i < N && pat[j] != txt[i])
{
// Do not match lps[0..lps[j-1]]
// characters, they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
// Driver code
let txt = "geeksforgeeks";
let pat = "eeks";
let ans = KMPSearch(pat, txt);
document.write(ans);
</script>
Time Complexity: O(M + N)
Auxiliary Space: O(M) As an array of size M is used to store the longest prefix suffix values for the pattern.
Similar Reads
Frequency of a substring in a string using pthread Given an input string and a substring. Find the frequency of occurrences of a substring in the given string using pthreads. Examples: Input: string = "man" substring = "dhimanman"Output: 2Input: string = "banana" substring = "nn"Output: 0Note: It is advised to execute the program in Linux based syst
6 min read
Frequency of a string in an array of strings You are given a collection of strings and a list of queries. For every query there is a string given. We need to print the number of times the given string occurs in the collection of strings. Examples: Input : arr[] = {wer, wer, tyu, oio, tyu} q[] = {wer, tyu, uio}Output : 2 2 0Explanation : q[0] a
15 min read
Number of substrings of a string Find total number of non-empty substrings of a string with N characters. Input : str = "abc" Output : 6 Every substring of the given string : "a", "b", "c", "ab", "bc", "abc" Input : str = "abcd" Output : 10 Every substring of the given string : "a", "b", "c", "d", "ab", "bc", "cd", "abc", "bcd" and
3 min read
Queries for frequencies of characters in substrings Given a string s and Q number of queries. Each query Q consists of l and r and a character c. Find the frequency of character c in substring l to r. Examples: Input : s = geeksforgeeks 4 0 5 e 2 6 f 4 7 m 0 12 e Output : 2 1 0 4 Substring from 0 to 5 is geeksf. Here e occurs 2 times. Input : s = app
6 min read
Find frequency of all characters across all substrings of given string Given a string S containing all lowercase characters and its length N. Find frequency of all characters across all substrings of the given string. Examples: Input: N = 3, S = "aba"Output: a 6b 4Explanation: The substrings are: a, b, a, ab, ba, aba. The frequency of each character: a = 6, b = 4. Henc
4 min read
Find frequency of each word in a string in Python Write a python code to find the frequency of each word in a given string. Examples: Input : str[] = "Apple Mango Orange Mango Guava Guava Mango" Output : frequency of Apple is : 1 frequency of Mango is : 3 frequency of Orange is : 1 frequency of Guava is : 2 Input : str = "Train Bus Bus Train Taxi A
7 min read
Count of Superstrings in a given array of strings Given 2 array of strings X and Y, the task is to find the number of superstrings in X. A string s is said to be a Superstring, if each string present in array Y is a subsequence of string s . Examples: Input: X = {"ceo", "alco", "caaeio", "ceai"}, Y = {"ec", "oc", "ceo"}Output: 2Explanation: Strings
8 min read
Maximum length substring with highest frequency in a string Given a string. The task is to find the maximum occurred substring with a maximum length. These occurrences can overlap. Examples: Input: str = "abab" Output: ab "a", "b", "ab" are occur 2 times. But, "ab" has maximum length Input: str = "abcd" Output: a Approach: The idea is to store the frequency
5 min read
Find the longest Substring of a given String S Given a string S of length, N. Find the maximum length of any substring of S such that, the bitwise OR of all the characters of the substring is equal to the bitwise OR of the remaining characters of the string. If no such substring exists, print -1. Examples: Input: S = "2347"Output: 3?Explanation:
10 min read
Count of substrings of given string with frequency of each character at most K Given a string str, the task is to calculate the number of substrings of the given string such that the frequency of each element of the string is almost K. Examples: Input: str = "abab", K = 1Output: 7Explanation: The substrings such that the frequency of each character is atmost 1 are "a", "b", "a
6 min read